diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-19 07:13:33 -0600 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-19 07:13:33 -0600 |
commit | 5f6e430f931d245da838db3e10e918681207029b (patch) | |
tree | 6adc54a582652ae470ce95d9205f798568339ff0 /arch | |
parent | a6e3e6f138058ff184d8ef5064a033b3f5fee8f8 (diff) | |
parent | 980411a4d1bb925d28cd9e8d8301dc982ece788d (diff) |
Merge tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Add powerpc qspinlock implementation optimised for large system
scalability and paravirt. See the merge message for more details
- Enable objtool to be built on powerpc to generate mcount locations
- Use a temporary mm for code patching with the Radix MMU, so the
writable mapping is restricted to the patching CPU
- Add an option to build the 64-bit big-endian kernel with the ELFv2
ABI
- Sanitise user registers on interrupt entry on 64-bit Book3S
- Many other small features and fixes
Thanks to Aboorva Devarajan, Angel Iglesias, Benjamin Gray, Bjorn
Helgaas, Bo Liu, Chen Lifu, Christoph Hellwig, Christophe JAILLET,
Christophe Leroy, Christopher M. Riedl, Colin Ian King, Deming Wang,
Disha Goel, Dmitry Torokhov, Finn Thain, Geert Uytterhoeven, Gustavo A.
R. Silva, Haowen Bai, Joel Stanley, Jordan Niethe, Julia Lawall, Kajol
Jain, Laurent Dufour, Li zeming, Miaoqian Lin, Michael Jeanson, Nathan
Lynch, Naveen N. Rao, Nayna Jain, Nicholas Miehlbradt, Nicholas Piggin,
Pali Rohár, Randy Dunlap, Rohan McLure, Russell Currey, Sathvika
Vasireddy, Shaomin Deng, Stephen Kitt, Stephen Rothwell, Thomas
Weißschuh, Tiezhu Yang, Uwe Kleine-König, Xie Shaowen, Xiu Jianfeng,
XueBing Chen, Yang Yingliang, Zhang Jiaming, ruanjinjie, Jessica Yu,
and Wolfram Sang.
* tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (181 commits)
powerpc/code-patching: Fix oops with DEBUG_VM enabled
powerpc/qspinlock: Fix 32-bit build
powerpc/prom: Fix 32-bit build
powerpc/rtas: mandate RTAS syscall filtering
powerpc/rtas: define pr_fmt and convert printk call sites
powerpc/rtas: clean up includes
powerpc/rtas: clean up rtas_error_log_max initialization
powerpc/pseries/eeh: use correct API for error log size
powerpc/rtas: avoid scheduling in rtas_os_term()
powerpc/rtas: avoid device tree lookups in rtas_os_term()
powerpc/rtasd: use correct OF API for event scan rate
powerpc/rtas: document rtas_call()
powerpc/pseries: unregister VPA when hot unplugging a CPU
powerpc/pseries: reset the RCU watchdogs after a LPM
powerpc: Take in account addition CPU node when building kexec FDT
powerpc: export the CPU node count
powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for snooze state
powerpc/dts/fsl: Fix pca954x i2c-mux node names
cxl: Remove unnecessary cxl_pci_window_alignment()
selftests/powerpc: Fix resource leaks
...
Diffstat (limited to 'arch')
133 files changed, 3062 insertions, 1083 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2ca5418457ed..b8c4ac56bddc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 source "arch/powerpc/platforms/Kconfig.cputype" +config CC_HAS_ELFV2 + def_bool PPC64 && $(cc-option, -mabi=elfv2) + config 32BIT bool default y if PPC32 @@ -96,7 +99,7 @@ config LOCKDEP_SUPPORT config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPTION + depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS config GENERIC_HWEIGHT bool @@ -155,7 +158,6 @@ config PPC select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS - select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM @@ -239,6 +241,8 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES + select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL + select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS @@ -294,6 +298,9 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_E500 +config PPC_HAS_LBARX_LHARX + bool + config EARLY_PRINTK bool default y @@ -529,6 +536,15 @@ config HOTPLUG_CPU Say N if you are unsure. +config INTERRUPT_SANITIZE_REGISTERS + bool "Clear gprs on interrupt arrival" + depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER + default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV + help + Reduce the influence of user register state on interrupt handlers and + syscalls through clearing user state from registers before handling + the exception. + config PPC_QUEUED_SPINLOCKS bool "Queued spinlocks" if EXPERT depends on SMP @@ -583,6 +599,24 @@ config KEXEC_FILE config ARCH_HAS_KEXEC_PURGATORY def_bool KEXEC_FILE +config PPC64_BIG_ENDIAN_ELF_ABI_V2 + bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)" + depends on PPC64 && CPU_BIG_ENDIAN + depends on CC_HAS_ELFV2 + depends on LD_IS_BFD && LD_VERSION >= 22400 + default n + help + This builds the kernel image using the "Power Architecture 64-Bit ELF + V2 ABI Specification", which has a reduced stack overhead and faster + function calls. This internal kernel ABI option does not affect + userspace compatibility. + + The V2 ABI is standard for 64-bit little-endian, but for big-endian + it is less well tested by kernel and toolchain. However some distros + build userspace this way, and it can produce a functioning kernel. + + This requires GCC and binutils 2.24 or newer. + config RELOCATABLE bool "Build a relocatable kernel" depends on PPC64 || (FLATMEM && (44x || PPC_85xx)) @@ -1012,19 +1046,6 @@ config PPC_SECVAR_SYSFS read/write operations on these variables. Say Y if you have secure boot enabled and want to expose variables to userspace. -config PPC_RTAS_FILTER - bool "Enable filtering of RTAS syscalls" - default y - depends on PPC_RTAS - help - The RTAS syscall API has security issues that could be used to - compromise system integrity. This option enforces restrictions on the - RTAS calls and arguments passed by userspace programs to mitigate - these issues. - - Say Y unless you know what you are doing and the filter is causing - problems for you. - endmenu config ISA_DMA_API diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts index d6858b7cd93f..9ea7942f914e 100644 --- a/arch/powerpc/boot/dts/fsl/t1024qds.dts +++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts @@ -151,7 +151,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index dbcd31cc35dc..270aaf631f2a 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -165,7 +165,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi index 615479732252..1c329f076f64 100644 --- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi @@ -268,7 +268,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index bfe1ed5be337..fc7bec5dcb90 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -128,7 +128,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi index db4139999b28..962c99941645 100644 --- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi @@ -135,7 +135,7 @@ }; i2c@118000 { - pca9547@77 { + i2c-mux@77 { compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi index ff87e67c70da..ecc3e8c7394c 100644 --- a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi @@ -138,7 +138,7 @@ }; i2c@118100 { - pca9546@77 { + i2c-mux@77 { compatible = "nxp,pca9546"; reg = <0x77>; }; diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index b69db1d275cd..269e930b3b0b 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -21,6 +21,14 @@ reg = <0x00000000 0x00000000 0x00000000 0x10000000>; }; + clocks { + sys_clk: litex_sys_clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <100000000>; + }; + }; + cpus { #size-cells = <0x00>; #address-cells = <0x01>; @@ -141,6 +149,20 @@ litex,slot-size = <0x800>; interrupts = <0x11 0x1>; }; + + mmc@8040000 { + compatible = "litex,mmc"; + reg = <0x8042800 0x800 + 0x8041000 0x800 + 0x8040800 0x800 + 0x8042000 0x800 + 0x8041800 0x800>; + reg-names = "phy", "core", "reader", "writer", "irq"; + bus-width = <4>; + interrupts = <0x13 1>; + cap-sd-highspeed; + clocks = <&sys_clk>; + }; }; chosen { diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index 045af668e928..e9cda34a140e 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -69,6 +69,20 @@ interrupt-parent = <&gpio>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */ <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */ + #address-cells = <1>; + #size-cells = <0>; + + /* Local temperature sensor (SA56004ED internal) */ + channel@0 { + reg = <0>; + label = "board"; + }; + + /* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */ + channel@1 { + reg = <1>; + label = "cpu"; + }; }; /* DDR3 SPD/EEPROM */ diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts index b4f32740870e..aa62d08e97c2 100644 --- a/arch/powerpc/boot/dts/warp.dts +++ b/arch/powerpc/boot/dts/warp.dts @@ -258,14 +258,12 @@ }; power-leds { - compatible = "gpio-leds"; + compatible = "warp-power-leds"; green { gpios = <&GPIO1 0 0>; - default-state = "keep"; }; red { gpios = <&GPIO1 1 0>; - default-state = "keep"; }; }; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 5bdd4dd20bbb..af04cea82b94 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -215,6 +215,11 @@ ld_version() }' } +ld_is_lld() +{ + ${CROSS}ld -V 2>&1 | grep -q LLD +} + # Do not include PT_INTERP segment when linking pie. Non-pie linking # just ignores this option. LD_VERSION=$(${CROSS}ld --version | ld_version) @@ -223,6 +228,14 @@ if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then nodl="--no-dynamic-linker" fi +# suppress some warnings in recent ld versions +nowarn="-z noexecstack" +if ! ld_is_lld; then + if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then + nowarn="$nowarn --no-warn-rwx-segments" + fi +fi + platformo=$object/"$platform".o lds=$object/zImage.lds ext=strip @@ -504,7 +517,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi @@ -581,7 +594,7 @@ ps3) # reached, then enter the system reset vector of the partially decompressed # image. No warning is issued. rm -f "$odir"/{otheros,otheros-too-big}.bld - size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1) + size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1) bld="otheros.bld" if [ $size -gt $((0x1000000)) ]; then bld="otheros-too-big.bld" diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h new file mode 100644 index 000000000000..86f46b604e9a --- /dev/null +++ b/arch/powerpc/include/asm/asm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_ASM_H +#define _ASM_POWERPC_ASM_H + +#define _ASM_PTR " .long " + +#endif /* _ASM_POWERPC_ASM_H */ diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index ba1743c52b56..4be572908124 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#include <linux/build_bug.h> + #define MMU_NO_CONTEXT (0) /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx @@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, { flush_tlb_page(vma, vmaddr); } + +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + BUILD_BUG(); +} + static inline void local_flush_tlb_mm(struct mm_struct *mm) { flush_tlb_mm(mm); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 751921f6db46..146287d9580f 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -65,56 +65,6 @@ extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); -static inline void hash__local_flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void hash__flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void hash__local_flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - -static inline void hash__flush_all_mm(struct mm_struct *mm) -{ - /* - * There's no Page Walk Cache for hash, so what is needed is - * the same as flush_tlb_mm(), which doesn't really make sense - * with hash. So the only thing we could do is flush the - * entire LPID! Punt for now, as it's not being used. - */ - WARN_ON_ONCE(1); -} - -static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void hash__flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void hash__flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ -} - struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 67655cd60545..dd39313242b4 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -47,8 +47,7 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_pmd_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_pmd_tlb_range(vma, start, end); } #define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE @@ -57,81 +56,65 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long end) { if (radix_enabled()) - return radix__flush_hugetlb_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_hugetlb_tlb_range(vma, start, end); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_range(vma, start, end); - return hash__flush_tlb_range(vma, start, end); + radix__flush_tlb_range(vma, start, end); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { if (radix_enabled()) - return radix__flush_tlb_kernel_range(start, end); - return hash__flush_tlb_kernel_range(start, end); + radix__flush_tlb_kernel_range(start, end); } static inline void local_flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__local_flush_tlb_mm(mm); - return hash__local_flush_tlb_mm(mm); + radix__local_flush_tlb_mm(mm); } static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__local_flush_tlb_page(vma, vmaddr); - return hash__local_flush_tlb_page(vma, vmaddr); + radix__local_flush_tlb_page(vma, vmaddr); } -static inline void local_flush_all_mm(struct mm_struct *mm) +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) { if (radix_enabled()) - return radix__local_flush_all_mm(mm); - return hash__local_flush_all_mm(mm); + radix__local_flush_tlb_page_psize(mm, vmaddr, psize); } static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) - return radix__tlb_flush(tlb); - return hash__tlb_flush(tlb); + radix__tlb_flush(tlb); } #ifdef CONFIG_SMP static inline void flush_tlb_mm(struct mm_struct *mm) { if (radix_enabled()) - return radix__flush_tlb_mm(mm); - return hash__flush_tlb_mm(mm); + radix__flush_tlb_mm(mm); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { if (radix_enabled()) - return radix__flush_tlb_page(vma, vmaddr); - return hash__flush_tlb_page(vma, vmaddr); -} - -static inline void flush_all_mm(struct mm_struct *mm) -{ - if (radix_enabled()) - return radix__flush_all_mm(mm); - return hash__flush_all_mm(mm); + radix__flush_tlb_page(vma, vmaddr); } #else #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) -#define flush_all_mm(mm) local_flush_all_mm(mm) #endif /* CONFIG_SMP */ #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a4736355c2..ef42adb44aa3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -99,7 +99,8 @@ __label__ __label_warn_on; \ \ WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \ - unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ \ __label_warn_on: \ break; \ diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 05f246c0e36e..d0ea0571e79a 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ * the previous value stored there. */ +#ifndef CONFIG_PPC_HAS_LBARX_LHARX XCHG_GEN(u8, _local, "memory"); XCHG_GEN(u8, _relaxed, "cc"); XCHG_GEN(u16, _local, "memory"); XCHG_GEN(u16, _relaxed, "cc"); +#else +static __always_inline unsigned long +__xchg_u8_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_local\n" +" stbcx. %3,0,%2 \n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned char *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u8_relaxed(u8 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lbarx %0,0,%2 # __xchg_u8_relaxed\n" +" stbcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_local(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_local\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*(volatile unsigned short *)p) + : "r" (p), "r" (val) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__xchg_u16_relaxed(u16 *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( +"1: lharx %0,0,%2 # __xchg_u16_relaxed\n" +" sthcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (val) + : "cc"); + + return prev; +} +#endif static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ (unsigned long)_x_, sizeof(*(ptr))); \ }) + /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ - +#ifndef CONFIG_PPC_HAS_LBARX_LHARX CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u8, _local, , , "memory"); CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); @@ -211,6 +278,168 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); CMPXCHG_GEN(u16, _local, , , "memory"); CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); CMPXCHG_GEN(u16, _relaxed, , , "cc"); +#else +static __always_inline unsigned long +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lbarx %0,0,%2 # __cmpxchg_u8\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b" + PPC_ATOMIC_EXIT_BARRIER + "\n\ +2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" stbcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + PPC_ATOMIC_ENTRY_BARRIER +"1: lharx %0,0,%2 # __cmpxchg_u16\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old, + unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_local\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" +" sthcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} +#endif static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74..3f881548fb61 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,8 +22,6 @@ #define BRANCH_SET_LINK 0x1 #define BRANCH_ABSOLUTE 0x2 -DECLARE_STATIC_KEY_FALSE(init_mem_is_free); - /* * Powerpc branch instruction is : * diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 431ae2343022..4961fb38e438 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -21,23 +21,8 @@ #include <asm/param.h> #include <asm/firmware.h> -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new) - #ifdef __KERNEL__ -/* - * Convert cputime <-> microseconds - */ -extern u64 __cputime_usec_factor; - -static inline unsigned long cputime_to_usecs(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_usec_factor); -} - -#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) +#define cputime_to_nsecs(cputime) tb_to_ns(cputime) /* * PPC64 uses PACA which is task independent for storing accounting data while diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 86a14736c76c..51c744608f37 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); +void suspend_breakpoints(void); +void restore_breakpoints(void); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 259b9dd5fe1c..91c049d51d0e 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,6 +10,13 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +/* Ignore unused weak functions which will have larger offsets */ +#ifdef CONFIG_MPROFILE_KERNEL +#define FTRACE_MCOUNT_MAX_OFFSET 12 +#elif defined(CONFIG_PPC32) +#define FTRACE_MCOUNT_MAX_OFFSET 8 +#endif + #ifndef __ASSEMBLY__ extern void _mcount(void); @@ -84,17 +91,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, * those. */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME -#ifdef CONFIG_PPC64_ELF_ABI_V1 -static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) -{ - /* We need to skip past the initial dot, and the __se_sys alias */ - return !strcmp(sym + 1, name) || - (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || - (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || - (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || - (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); -} -#else static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { return !strcmp(sym, name) || @@ -103,7 +99,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ #if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8abae463f6c1..95fd7f9485d5 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -79,7 +79,7 @@ #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDED -46 -#define H_P1 -54 +#define H_ABORTED -54 #define H_P2 -55 #define H_P3 -56 #define H_P4 -57 @@ -100,7 +100,6 @@ #define H_COP_HW -74 #define H_STATE -75 #define H_IN_USE -77 -#define H_ABORTED -78 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1a6c1ce17735..47d46712928a 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -11,64 +11,6 @@ */ #include <asm/hw_irq.h> -#else -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_IRQSOFF_TRACER -/* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ -#define TRACE_WITH_FRAME_BUFFER(func) \ - mflr r0; \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - std r0, 16(r1); \ - stdu r1, -STACK_FRAME_OVERHEAD(r1); \ - bl func; \ - ld r1, 0(r1); \ - ld r1, 0(r1); -#else -#define TRACE_WITH_FRAME_BUFFER(func) \ - bl func; -#endif - -/* - * These are calls to C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) -#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) - -/* - * This is used by assembly code to soft-disable interrupts first and - * reconcile irq state. - * - * NB: This may call C code, so the caller must be prepared for volatiles to - * be clobbered. - */ -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQSOFTMASK(r13); \ - lbz __rB,PACAIRQHAPPENED(r13); \ - andi. __rA,__rA,IRQS_DISABLED; \ - li __rA,IRQS_DISABLED; \ - ori __rB,__rB,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQHAPPENED(r13); \ - bne 44f; \ - stb __rA,PACAIRQSOFTMASK(r13); \ - TRACE_DISABLE_INTS; \ -44: - -#else -#define TRACE_ENABLE_INTS -#define TRACE_DISABLE_INTS - -#define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,IRQS_DISABLED; \ - ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACAIRQSOFTMASK(r13); \ - stb __rA,PACAIRQHAPPENED(r13) -#endif #endif #endif diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index c8882d9b86c2..a36797938620 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -105,7 +105,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ + u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bfacf12784dd..eae9619b6190 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -1014,6 +1014,18 @@ static inline void kvmppc_fix_ee_before_entry(void) #endif } +static inline void kvmppc_fix_ee_after_exit(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ALL_DISABLED); +#endif + + trace_hardirqs_off(); +} + + static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) { ulong ea; diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b71b9582e754..b88d1d2cf304 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -4,6 +4,9 @@ #include <asm/types.h> +#define __ALIGN .align 2 +#define __ALIGN_STR ".align 2" + #ifdef CONFIG_PPC64_ELF_ABI_V1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index c1ea270bb848..57f5017111f4 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -151,8 +151,8 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * nMMU and/or PSL need to be cleaned up. * * Both the 'copros' and 'active_cpus' counts are looked at in - * flush_all_mm() to determine the scope (local/global) of the - * TLBIs, so we need to flush first before decrementing + * radix__flush_all_mm() to determine the scope (local/global) + * of the TLBIs, so we need to flush first before decrementing * 'copros'. If this API is used by several callers for the * same context, it can lead to over-flushing. It's hopefully * not common enough to be a problem. @@ -164,7 +164,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * in-between. */ if (radix_enabled()) { - flush_all_mm(mm); + radix__flush_all_mm(mm); c = atomic_dec_if_positive(&mm->context.copros); /* Detect imbalance between add and remove */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index cb1ac02ae8ee..70edad44dff6 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -256,8 +256,14 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p num = number_of_cells_per_pte(pmd, new, huge); - for (i = 0; i < num; i++, entry++, new += SZ_4K) - *entry = new; + for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { + *entry++ = new; + if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { + *entry++ = new; + *entry++ = new; + *entry++ = new; + } + } return old; } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index d9067dfc531c..69c3a050a3d8 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -183,7 +183,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * cases, and 32-bit non-hash with 32-bit PTEs. */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - ptep->pte = ptep->pte1 = ptep->pte2 = ptep->pte3 = pte_val(pte); + ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte); #else *ptep = pte; #endif diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index bdaf34ad41ea..9a2cf83ea4f1 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; @@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 753a2757bcd4..d2f44612f4b0 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -74,6 +74,25 @@ #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) #define REST_GPR(n, base) REST_GPRS(n, n, base) +/* macros for handling user register sanitisation */ +#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS +#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \ + ZEROIZE_GPRS(5, 12); \ + ZEROIZE_NVGPRS() +#define SANITIZE_GPR(n) ZEROIZE_GPR(n) +#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1) +#define HANDLER_RESTORE_NVGPRS() +#else +#define SANITIZE_SYSCALL_GPRS() +#define SANITIZE_GPR(n) +#define SANITIZE_GPRS(start, end) +#define SANITIZE_NVGPRS() +#define SANITIZE_RESTORE_NVGPRS() +#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1) +#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */ + #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) #define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 631802999d59..e96c9b8c2a60 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -374,9 +374,18 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #endif -/* Check that a certain kernel stack pointer is valid in task_struct p */ -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes); +/* + * Check that a certain kernel stack pointer is a valid (minimum sized) + * stack frame in task_struct p. + */ +int validate_sp(unsigned long sp, struct task_struct *p); + +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes); /* * Prefetch macros. diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 2e82820fbd64..c0107d8ddd8c 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -85,6 +85,7 @@ struct of_drc_info { extern int of_read_drc_info_cell(struct property **prop, const __be32 **curval, struct of_drc_info *data); +extern unsigned int boot_cpu_node_count; /* * There are two methods for telling firmware what our capabilities are. diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 8a0d8fb35328..d503dbd7856c 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -425,10 +425,6 @@ static inline void *ps3_system_bus_get_drvdata( return dev_get_drvdata(&dev->core); } -/* These two need global scope for get_arch_dma_ops(). */ - -extern struct bus_type ps3_system_bus_type; - /* system manager */ struct ps3_sys_manager_ops { diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 714a35f0d425..73c22c579a79 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -60,29 +60,4 @@ static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) return pa; } -/* - * This is what we should always use. Any other lockless page table lookup needs - * careful audit against THP split. - */ -static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *hshift) -{ - pte_t *pte; - - VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); - VM_WARN(pgdir != current->mm->pgd, - "%s lock less page table lookup called on wrong mm\n", __func__); - pte = __find_linux_pte(pgdir, ea, is_thp, hshift); - -#if defined(CONFIG_DEBUG_VM) && \ - !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)) - /* - * We should not find huge page if these configs are not enabled. - */ - if (hshift) - WARN_ON(*hshift); -#endif - return pte; -} - #endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 2efec6d87049..0eb90a013346 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -97,8 +97,6 @@ struct pt_regs #endif -#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) - // Always displays as "REGS" in memory dumps #ifdef CONFIG_CPU_BIG_ENDIAN #define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753) @@ -120,16 +118,27 @@ struct pt_regs #define USER_REDZONE_SIZE 512 #define KERNEL_REDZONE_SIZE 288 -#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ - STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) -#define STACK_FRAME_MARKER 12 #ifdef CONFIG_PPC64_ELF_ABI_V2 #define STACK_FRAME_MIN_SIZE 32 +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) +#define STACK_INT_FRAME_MARKER STACK_FRAME_MIN_SIZE +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16) +#define STACK_SWITCH_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16) #else -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +/* + * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save + * area. This parameter area is not used by calls to C from interrupt entry, + * so the second from last one of those is used for the frame marker. + */ +#define STACK_FRAME_MIN_SIZE 112 +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 16) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE #endif /* Size of dummy stack frame allocated when calling signal handler. */ @@ -140,17 +149,22 @@ struct pt_regs #define USER_REDZONE_SIZE 0 #define KERNEL_REDZONE_SIZE 0 -#define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ +#define STACK_FRAME_MIN_SIZE 16 #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ -#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) -#define STACK_FRAME_MARKER 2 -#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE +#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 8) +#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE) +#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 #endif /* __powerpc64__ */ +#define STACK_INT_FRAME_SIZE (KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE) +#define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long)) + #ifndef __ASSEMBLY__ #include <asm/paca.h> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b676c4fb90fd..28a53fb69b38 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -2,83 +2,173 @@ #ifndef _ASM_POWERPC_QSPINLOCK_H #define _ASM_POWERPC_QSPINLOCK_H -#include <asm-generic/qspinlock_types.h> +#include <linux/compiler.h> +#include <asm/qspinlock_types.h> #include <asm/paravirt.h> -#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ +#ifdef CONFIG_PPC64 +/* + * Use the EH=1 hint for accesses that result in the lock being acquired. + * The hardware is supposed to optimise this pattern by holding the lock + * cacheline longer, and releasing when a store to the same memory (the + * unlock) is performed. + */ +#define _Q_SPIN_EH_HINT 1 +#else +#define _Q_SPIN_EH_HINT 0 +#endif -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_queued_spin_unlock(struct qspinlock *lock); +/* + * The trylock itself may steal. This makes trylocks slightly stronger, and + * makes locks slightly more efficient when stealing. + * + * This is compile-time, so if true then there may always be stealers, so the + * nosteal paths become unused. + */ +#define _Q_SPIN_TRY_LOCK_STEAL 1 -static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) -{ - if (!is_shared_processor()) - native_queued_spin_lock_slowpath(lock, val); - else - __pv_queued_spin_lock_slowpath(lock, val); -} +/* + * Put a speculation barrier after testing the lock/node and finding it + * busy. Try to prevent pointless speculation in slow paths. + * + * Slows down the lockstorm microbenchmark with no stealing, where locking + * is purely FIFO through the queue. May have more benefit in real workload + * where speculating into the wrong place could have a greater cost. + */ +#define _Q_SPIN_SPEC_BARRIER 0 -#define queued_spin_unlock queued_spin_unlock -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - if (!is_shared_processor()) - smp_store_release(&lock->locked, 0); - else - __pv_queued_spin_unlock(lock); -} +#ifdef CONFIG_PPC64 +/* + * Execute a miso instruction after passing the MCS lock ownership to the + * queue head. Miso is intended to make stores visible to other CPUs sooner. + * + * This seems to make the lockstorm microbenchmark nospin test go slightly + * faster on POWER10, but disable for now. + */ +#define _Q_SPIN_MISO 0 +#else +#define _Q_SPIN_MISO 0 +#endif +#ifdef CONFIG_PPC64 +/* + * This executes miso after an unlock of the lock word, having ownership + * pass to the next CPU sooner. This will slow the uncontended path to some + * degree. Not evidence it helps yet. + */ +#define _Q_SPIN_MISO_UNLOCK 0 #else -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#define _Q_SPIN_MISO_UNLOCK 0 #endif -static __always_inline void queued_spin_lock(struct qspinlock *lock) +/* + * Seems to slow down lockstorm microbenchmark, suspect queue node just + * has to become shared again right afterwards when its waiter spins on + * the lock field. + */ +#define _Q_SPIN_PREFETCH_NEXT 0 + +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { - u32 val = 0; + return READ_ONCE(lock->val); +} - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) - return; +static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) +{ + return !lock.val; +} - queued_spin_lock_slowpath(lock, val); +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) +{ + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); } -#define queued_spin_lock queued_spin_lock -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define SPIN_THRESHOLD (1<<15) /* not tuned */ +static __always_inline u32 queued_spin_encode_locked_val(void) +{ + /* XXX: make this use lock value in paca like simple spinlocks? */ + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); +} -static __always_inline void pv_wait(u8 *ptr, u8 val) +static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) { - if (*ptr != val) - return; - yield_to_any(); - /* - * We could pass in a CPU here if waiting in the queue and yield to - * the previous CPU in the queue. - */ + u32 new = queued_spin_encode_locked_val(); + u32 prev; + + /* Trylock succeeds only when unlocked and no queued nodes */ + asm volatile( +"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n" +" cmpwi 0,%0,0 \n" +" bne- 2f \n" +" stwcx. %2,0,%1 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (new), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return likely(prev == 0); } -static __always_inline void pv_kick(int cpu) +static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) { - prod_cpu(cpu); + u32 new = queued_spin_encode_locked_val(); + u32 prev, tmp; + + /* Trylock may get ahead of queued nodes if it finds unlocked */ + asm volatile( +"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n" +" andc. %1,%0,%4 \n" +" bne- 2f \n" +" and %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return likely(!(prev & ~_Q_TAIL_CPU_MASK)); } -extern void __pv_init_lock_hash(void); +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!_Q_SPIN_TRY_LOCK_STEAL) + return __queued_spin_trylock_nosteal(lock); + else + return __queued_spin_trylock_steal(lock); +} -static inline void pv_spinlocks_init(void) +void queued_spin_lock_slowpath(struct qspinlock *lock); + +static __always_inline void queued_spin_lock(struct qspinlock *lock) { - __pv_init_lock_hash(); + if (!queued_spin_trylock(lock)) + queued_spin_lock_slowpath(lock); } -#endif +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); + if (_Q_SPIN_MISO_UNLOCK) + asm volatile("miso" ::: "memory"); +} -/* - * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, - * which was found to have performance problems if implemented with - * the preferred spin_begin()/spin_end() SMT priority pattern. Use the - * generic version instead. - */ +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) -#include <asm-generic/qspinlock.h> +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void); +#else +static inline void pv_spinlocks_init(void) { } +#endif #endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h deleted file mode 100644 index 6b60e7736a47..000000000000 --- a/arch/powerpc/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H -#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H - -EXPORT_SYMBOL(__pv_queued_spin_unlock); - -#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h new file mode 100644 index 000000000000..4766a7aa03cb --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_types.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H +#define _ASM_POWERPC_QSPINLOCK_TYPES_H + +#include <linux/types.h> +#include <asm/byteorder.h> + +typedef struct qspinlock { + union { + u32 val; + +#ifdef __LITTLE_ENDIAN + struct { + u16 locked; + u8 reserved[2]; + }; +#else + struct { + u8 reserved[2]; + u16 locked; + }; +#endif + }; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } } + +/* + * Bitfields in the lock word: + * + * 0: locked bit + * 1-14: lock holder cpu + * 15: lock owner or queuer vcpus observed to be preempted bit + * 16: must queue bit + * 17-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +/* 0x00000001 */ +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 1 +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) + +/* 0x00007ffe */ +#define _Q_OWNER_CPU_OFFSET 1 +#define _Q_OWNER_CPU_BITS 14 +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) + +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + +/* 0x00008000 */ +#define _Q_SLEEPY_OFFSET 15 +#define _Q_SLEEPY_BITS 1 +#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET) + +/* 0x00010000 */ +#define _Q_MUST_Q_OFFSET 16 +#define _Q_MUST_Q_BITS 1 +#define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET) + +/* 0xfffe0000 */ +#define _Q_TAIL_CPU_OFFSET 17 +#define _Q_TAIL_CPU_BITS 15 +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS) +#error "qspinlock does not support such large CONFIG_NR_CPUS" +#endif + +#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 56319aea646e..479a95cb2770 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -33,21 +33,6 @@ #define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ -/* - * In general to call RTAS use rtas_token("string") to lookup - * an RTAS token for the given string (e.g. "event-scan"). - * To actually perform the call use - * ret = rtas_call(token, n_in, n_out, ...) - * Where n_in is the number of input parameters and - * n_out is the number of output parameters - * - * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE - * will be returned as a token. rtas_call() does look for this - * token and error out gracefully so rtas_call(rtas_token("str"), ...) - * may be safely used for one-shot calls to RTAS. - * - */ - /* RTAS event classes */ #define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index bd75872a6334..7dafca8e3f02 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -13,7 +13,7 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() -#ifndef CONFIG_PARAVIRT_SPINLOCKS +#ifndef CONFIG_PPC_QUEUED_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index d5f8a74ed2e8..40b01446cf75 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ #endif #ifdef CONFIG_PPC_QUEUED_SPINLOCKS -#include <asm-generic/qspinlock_types.h> +#include <asm/qspinlock_types.h> #include <asm-generic/qrwlock_types.h> #else #include <asm/simple_spinlock_types.h> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4ce2a4aa3985..d24a59a98c0c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -72,7 +72,7 @@ #endif #define STACK_PT_REGS_OFFSET(sym, val) \ - DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) + DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val)) int main(void) { @@ -167,9 +167,8 @@ int main(void) OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr); - /* Local pt_regs on stack for Transactional Memory funcs. */ - DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + - sizeof(struct pt_regs) + 16); + /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */ + DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); @@ -261,7 +260,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); + DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); @@ -418,21 +417,18 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets); OFFSET(KVM_SDR1, kvm, arch.sdr1); OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); - OFFSET(KVM_RADIX, kvm, arch.radix); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); - OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested); OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); #endif @@ -449,16 +445,12 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); - OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); - OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); - OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); - OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); @@ -486,8 +478,6 @@ int main(void) OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr); OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop); OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); - OFFSET(VCPU_TID, kvm_vcpu, arch.tid); - OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); @@ -582,8 +572,6 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); - HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -594,9 +582,6 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); - HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); - HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); - HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); @@ -672,17 +657,6 @@ int main(void) OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif -#ifdef CONFIG_KVM_XICS - DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu, - arch.xive_saved_state)); - DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, - arch.xive_cam_word)); - DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); - DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); - DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); - DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); -#endif - #ifdef CONFIG_KVM_EXIT_TIMING OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8b5ff64b604..f29ce3dd6140 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -4,6 +4,8 @@ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) */ +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/cputable.h> @@ -81,7 +83,7 @@ _GLOBAL(__setup_cpu_745x) blr /* Enable caches for 603's, 604, 750 & 7400 */ -setup_common_caches: +SYM_FUNC_START_LOCAL(setup_common_caches) mfspr r11,SPRN_HID0 andi. r0,r11,HID0_DCE ori r11,r11,HID0_ICE|HID0_DCE @@ -95,11 +97,12 @@ setup_common_caches: sync isync blr +SYM_FUNC_END(setup_common_caches) /* 604, 604e, 604ev, ... * Enable superscalar execution & branch history table */ -setup_604_hid0: +SYM_FUNC_START_LOCAL(setup_604_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SIED|HID0_BHTE ori r8,r11,HID0_BTCD @@ -110,6 +113,7 @@ setup_604_hid0: sync isync blr +SYM_FUNC_END(setup_604_hid0) /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. @@ -125,13 +129,14 @@ setup_604_hid0: * needed once we have applied workaround #5 (though it's * not set by Apple's firmware at least). */ -setup_7400_workarounds: +SYM_FUNC_START_LOCAL(setup_7400_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0207 ble 1f blr -setup_7410_workarounds: +SYM_FUNC_END(setup_7400_workarounds) +SYM_FUNC_START_LOCAL(setup_7410_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0100 @@ -151,6 +156,7 @@ setup_7410_workarounds: sync isync blr +SYM_FUNC_END(setup_7410_workarounds) /* 740/750/7400/7410 * Enable Store Gathering (SGE), Address Broadcast (ABE), @@ -158,7 +164,7 @@ setup_7410_workarounds: * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) */ -setup_750_7400_hid0: +SYM_FUNC_START_LOCAL(setup_750_7400_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC oris r11,r11,HID0_DPM@h @@ -177,12 +183,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_750_7400_hid0) /* 750cx specific * Looks like we have to disable NAP feature for some PLL settings... * (waiting for confirmation) */ -setup_750cx: +SYM_FUNC_START_LOCAL(setup_750cx) mfspr r10, SPRN_HID1 rlwinm r10,r10,4,28,31 cmpwi cr0,r10,7 @@ -196,11 +203,13 @@ setup_750cx: andc r6,r6,r7 stw r6,CPU_SPEC_FEATURES(r4) blr +SYM_FUNC_END(setup_750cx) /* 750fx specific */ -setup_750fx: +SYM_FUNC_START_LOCAL(setup_750fx) blr +SYM_FUNC_END(setup_750fx) /* MPC 745x * Enable Store Gathering (SGE), Branch Folding (FOLD) @@ -212,7 +221,7 @@ setup_750fx: * Clear Instruction cache throttling (ICTC) * Enable L2 HW prefetch */ -setup_745x_specifics: +SYM_FUNC_START_LOCAL(setup_745x_specifics) /* We check for the presence of an L3 cache setup by * the firmware. If any, we disable NAP capability as * it's known to be bogus on rev 2.1 and earlier @@ -270,6 +279,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_745x_specifics) /* * Initialize the FPU registers. This is needed to work around an errata diff --git a/arch/powerpc/kernel/cpu_setup_e500.S b/arch/powerpc/kernel/cpu_setup_e500.S index 2ab25161b0ad..077cfccc3461 100644 --- a/arch/powerpc/kernel/cpu_setup_e500.S +++ b/arch/powerpc/kernel/cpu_setup_e500.S @@ -8,6 +8,8 @@ * Benjamin Herrenschmidt <benh@kernel.crashing.org> */ +#include <linux/linkage.h> + #include <asm/page.h> #include <asm/processor.h> #include <asm/cputable.h> @@ -274,7 +276,7 @@ _GLOBAL(flush_dcache_L1) blr -has_L2_cache: +SYM_FUNC_START_LOCAL(has_L2_cache) /* skip L2 cache on P2040/P2040E as they have no L2 cache */ mfspr r3, SPRN_SVR /* shift right by 8 bits and clear E bit of SVR */ @@ -290,9 +292,10 @@ has_L2_cache: 1: li r3, 0 blr +SYM_FUNC_END(has_L2_cache) /* flush backside L2 cache */ -flush_backside_L2_cache: +SYM_FUNC_START_LOCAL(flush_backside_L2_cache) mflr r10 bl has_L2_cache mtlr r10 @@ -313,6 +316,7 @@ flush_backside_L2_cache: bne 1b 2: blr +SYM_FUNC_END(flush_backside_L2_cache) _GLOBAL(cpu_down_flush_e500v2) mflr r0 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3fc7c9886bb7..5604c9a1ac22 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -18,6 +18,8 @@ #include <linux/err.h> #include <linux/sys.h> #include <linux/threads.h> +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -74,17 +76,18 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */ #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) - .globl __kuep_lock -__kuep_lock: +SYM_FUNC_START(__kuep_lock) lwz r9, THREAD+THSR0(r2) update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_lock) -__kuep_unlock: +SYM_FUNC_START_LOCAL(__kuep_unlock) lwz r9, THREAD+THSR0(r2) rlwinm r9,r9,0,~SR_NX update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_unlock) .macro kuep_lock bl __kuep_lock @@ -114,7 +117,7 @@ transfer_to_syscall: addi r12,r12,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r1) li r2, INTERRUPT_SYSCALL - stw r12,8(r1) + stw r12,STACK_INT_FRAME_MARKER(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) SAVE_GPRS(3, 8, r1) @@ -123,12 +126,12 @@ transfer_to_syscall: kuep_lock /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 bl system_call_exception ret_from_syscall: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 bl syscall_exit_prepare #ifdef CONFIG_PPC_47x @@ -215,9 +218,9 @@ ret_from_kernel_thread: * in arch/ppc/kernel/process.c */ _GLOBAL(_switch) - stwu r1,-INT_FRAME_SIZE(r1) + stwu r1,-SWITCH_FRAME_SIZE(r1) mflr r0 - stw r0,INT_FRAME_SIZE+4(r1) + stw r0,SWITCH_FRAME_SIZE+4(r1) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ @@ -248,7 +251,7 @@ _GLOBAL(_switch) lwz r4,_NIP(r1) /* Return to _switch caller in new task */ mtlr r4 - addi r1,r1,INT_FRAME_SIZE + addi r1,r1,SWITCH_FRAME_SIZE blr .globl fast_exception_return @@ -293,7 +296,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3e2e37e6ecab..1bf1121e17f1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -14,6 +14,7 @@ * code, and exception/interrupt return code for PowerPC. */ +#include <linux/objtool.h> #include <linux/errno.h> #include <linux/err.h> #include <asm/cache.h> @@ -73,6 +74,7 @@ flush_branch_caches: // Flush the link stack .rept 64 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr b 1f diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2f68fb2ee4fc..3f86091e68b3 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -358,7 +358,6 @@ ret_from_mc_except: std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) - /* Core exception code for all exceptions except TLB misses. */ #define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ @@ -391,10 +390,11 @@ exc_##n##_common: \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ - std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ - SAVE_NVGPRS(r1); + SAVE_NVGPRS(r1); \ + SANITIZE_NVGPRS(); /* minimise speculation influence */ #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) @@ -455,7 +455,7 @@ exc_##n##_bad_stack: \ EXCEPTION_COMMON(trapnum) \ ack(r8); \ CHECK_NAPPING(); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ + addi r3,r1,STACK_INT_FRAME_REGS; \ bl hdlr; \ b interrupt_return @@ -504,7 +504,7 @@ __end_interrupts: EXCEPTION_COMMON_CRIT(0x100) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -515,7 +515,7 @@ __end_interrupts: EXCEPTION_COMMON_MC(0x000) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception b ret_from_mc_except @@ -570,7 +570,7 @@ __end_interrupts: std r14,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) EXCEPTION_COMMON(0x700) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -586,7 +586,7 @@ __end_interrupts: beq- 1f bl load_up_fpu b fast_interrupt_return -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception b interrupt_return @@ -606,7 +606,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return @@ -616,7 +616,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception @@ -643,7 +643,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0x9f0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else @@ -664,7 +664,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -731,7 +731,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) EXCEPTION_COMMON_CRIT(0xd00) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -802,7 +802,7 @@ kernel_dbg_exc: ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) EXCEPTION_COMMON_DBG(0xd08) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -812,7 +812,7 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) CHECK_NAPPING() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS /* * XXX: Returning from performance_monitor_exception taken as a * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return @@ -834,7 +834,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2a0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -846,7 +846,7 @@ kernel_dbg_exc: GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -857,7 +857,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2e0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -866,7 +866,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -875,7 +875,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -884,7 +884,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -979,7 +979,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_page_fault b interrupt_return @@ -988,7 +988,7 @@ storage_fault_common: * continues here. */ alignment_more: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) b interrupt_return @@ -1069,7 +1069,7 @@ bad_stack_book3e: ZEROIZE_GPR(12) std r12,0(r11) LOAD_PACA_TOC() -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_bad_stack b 1b diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 651c36b056bd..6441a1ba57ac 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -13,6 +13,7 @@ * */ +#include <linux/linkage.h> #include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> @@ -111,6 +112,7 @@ name: #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ +#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */ #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -176,6 +178,9 @@ do_define_int n .ifndef IKUAP IKUAP=1 .endif + .ifndef IMSR_R12 + IMSR_R12=0 + .endif .endm /* @@ -502,6 +507,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text) std r10,0(r1) /* make stack chain pointer */ std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + SANITIZE_GPR(0) /* Mark our [H]SRRs valid for return */ li r10,1 @@ -544,8 +550,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) + .if !IMSR_R12 + SANITIZE_GPRS(9, 12) + .else + SANITIZE_GPRS(9, 11) + .endif SAVE_NVGPRS(r1) + SANITIZE_NVGPRS() .if IDAR .if IISIDE @@ -577,8 +589,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) - std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ + SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */ + SANITIZE_GPRS(2, 8) mflr r9 /* Get LR, later save to stack */ LOAD_PACA_TOC() /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -591,7 +603,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) li r10,0 LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r10,RESULT(r1) /* clear regs->result */ - std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ + std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */ .endm /* @@ -696,6 +708,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 + SANITIZE_RESTORE_NVGPRS() REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ @@ -1061,7 +1074,7 @@ EXC_COMMON_BEGIN(system_reset_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl system_reset_exception /* Clear MSR_RI before setting SRR0 and SRR1. */ @@ -1208,7 +1221,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_FTR_SECTION bl machine_check_early_boot END_FTR_SECTION(0, 1) // nop out after boot @@ -1298,7 +1311,7 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception_async b interrupt_return_srr @@ -1364,14 +1377,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * This is the NMI version of the handler because we are called from * the early handler which is a true NMI. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . @@ -1422,7 +1435,7 @@ EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andis. r0,r4,DSISR_DABRMATCH@h bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU @@ -1441,7 +1454,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() b interrupt_return_srr @@ -1479,7 +1492,7 @@ EXC_COMMON_BEGIN(data_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1493,7 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1525,7 +1538,7 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1567,7 +1580,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1581,7 +1594,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_bad_segment_interrupt b interrupt_return_srr @@ -1635,7 +1648,7 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ BEGIN_FTR_SECTION b interrupt_return_hsrr @@ -1665,9 +1678,9 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1731,9 +1744,9 @@ EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_BODY program_check .Ldo_program_check: - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1751,6 +1764,7 @@ INT_DEFINE_BEGIN(fp_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(fp_unavailable) EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) @@ -1762,7 +1776,7 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 @@ -1780,7 +1794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl fp_unavailable_tm b interrupt_return_srr #endif @@ -1824,7 +1838,7 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl timer_interrupt b interrupt_return_srr @@ -1909,7 +1923,7 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2076,7 +2090,7 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl single_step_exception b interrupt_return_srr @@ -2110,7 +2124,7 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE @@ -2139,7 +2153,7 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2162,9 +2176,9 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl emulation_assist_interrupt - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2222,7 +2236,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl hmi_exception_realmode cmpdi cr0,r3,0 bne 1f @@ -2240,7 +2254,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl handle_hmi_exception b interrupt_return_hsrr @@ -2274,7 +2288,7 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else @@ -2310,7 +2324,7 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl do_IRQ b interrupt_return_hsrr @@ -2356,7 +2370,7 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS lbz r4,PACAIRQSOFTMASK(r13) cmpdi r4,IRQS_ENABLED bne 1f @@ -2384,6 +2398,7 @@ INT_DEFINE_BEGIN(altivec_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -2410,14 +2425,14 @@ BEGIN_FTR_SECTION b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return_srr @@ -2433,6 +2448,7 @@ INT_DEFINE_BEGIN(vsx_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) @@ -2458,14 +2474,14 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_tm b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl vsx_unavailable_exception b interrupt_return_srr @@ -2492,9 +2508,9 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2520,9 +2536,10 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl facility_unavailable_exception - REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ + /* XXX Shouldn't be necessary in practice */ + HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2550,7 +2567,7 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_system_error_exception b interrupt_return_hsrr @@ -2581,7 +2598,7 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl instruction_breakpoint_exception b interrupt_return_srr @@ -2703,7 +2720,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return_hsrr @@ -2720,7 +2737,7 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_maintenance_exception b interrupt_return_hsrr @@ -2745,10 +2762,10 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) EXC_VIRT_END(altivec_assist, 0x5700, 0x100) EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC bl altivec_assist_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else bl unknown_exception #endif @@ -2767,7 +2784,7 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl cbe_thermal_exception b interrupt_return_hsrr @@ -2800,7 +2817,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl soft_nmi_interrupt /* Clear MSR_RI before setting SRR0 and SRR1. */ @@ -3124,7 +3141,7 @@ _GLOBAL(enable_machine_check) blr /* MSR[RI] should be clear because this uses SRR[01] */ -disable_machine_check: +SYM_FUNC_START_LOCAL(disable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 @@ -3137,3 +3154,4 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr +SYM_FUNC_END(disable_machine_check) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c3286260a7d1..f8e2911478a7 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -112,7 +112,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r1) + stw r10,STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) @@ -127,7 +127,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 088f500896c7..3f68a1624646 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -28,6 +28,8 @@ #include <linux/init.h> #include <linux/pgtable.h> #include <linux/sizes.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -602,7 +604,7 @@ start_here: lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init /* We have to do this with MMU on */ @@ -662,7 +664,7 @@ start_here: * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) tlbia /* Invalidate all TLB entries */ isync @@ -711,6 +713,7 @@ initial_mmu: mtspr SPRN_EVPR,r0 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(abort) mfspr r13,SPRN_DBCR0 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f15cb9fdb692..63a85c16fef4 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -109,7 +109,7 @@ _GLOBAL(_start); lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init @@ -1012,7 +1012,7 @@ _GLOBAL(start_secondary_47x) */ lis r1,temp_boot_stack@h ori r1,r1,temp_boot_stack@l - addi r1,r1,1024-STACK_FRAME_OVERHEAD + addi r1,r1,1024-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) bl mmu_init_secondary @@ -1025,7 +1025,7 @@ _GLOBAL(start_secondary_47x) lwz r1,TASK_STACK(r2) /* Current stack pointer */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dedcc6fe2263..7558ba4eb864 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -18,6 +18,7 @@ * variants. */ +#include <linux/linkage.h> #include <linux/threads.h> #include <linux/init.h> #include <asm/reg.h> @@ -424,7 +425,7 @@ generic_secondary_common_init: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) @@ -462,7 +463,7 @@ generic_secondary_common_init: * Assumes we're mapped EA == RA if the MMU is on. */ #ifdef CONFIG_PPC_BOOK3S -__mmu_off: +SYM_FUNC_START_LOCAL(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -473,6 +474,7 @@ __mmu_off: sync rfid b . /* prevent speculative execution */ +SYM_FUNC_END(__mmu_off) #endif @@ -780,7 +782,7 @@ _GLOBAL(pmac_secondary_start) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE b __secondary_start @@ -869,7 +871,7 @@ _GLOBAL(start_secondary_resume) /* * This subroutine clobbers r11 and r12 */ -enable_64b_mode: +SYM_FUNC_START_LOCAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ #ifdef CONFIG_PPC_BOOK3E_64 oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ @@ -881,6 +883,7 @@ enable_64b_mode: isync #endif blr +SYM_FUNC_END(enable_64b_mode) /* * This puts the TOC pointer into r2, offset by 0x8000 (as expected @@ -958,7 +961,7 @@ start_here_multiplatform: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) + stdu r0,-STACK_FRAME_MIN_SIZE(r1) /* * Do very early kernel initializations, including initial hash table diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 52c0ab416326..d438ca74e96c 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -29,6 +29,8 @@ #include <linux/init.h> #include <linux/threads.h> #include <linux/pgtable.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -229,7 +231,7 @@ set_ivor: lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) #ifdef CONFIG_SMP stw r24, TASK_CPU(r2) @@ -885,7 +887,7 @@ KernelSPE: * Translate the effec addr in r3 to phys addr. The phys addr will be put * into r3(higher 32bit) and r4(lower 32bit) */ -get_phys_addr: +SYM_FUNC_START_LOCAL(get_phys_addr) mfmsr r8 mfspr r9,SPRN_PID rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ @@ -907,6 +909,7 @@ get_phys_addr: mfspr r3,SPRN_MAS7 #endif blr +SYM_FUNC_END(get_phys_addr) /* * Global functions @@ -972,10 +975,10 @@ _GLOBAL(__giveup_spe) li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ beq 1f - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lwz r4,_MSR-STACK_INT_FRAME_REGS(r5) lis r3,MSR_SPE@h andc r4,r4,r3 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) + stw r4,_MSR-STACK_INT_FRAME_REGS(r5) 1: blr #endif /* CONFIG_SPE */ @@ -1044,7 +1047,7 @@ __secondary_start: lwz r1,TASK_STACK(r2) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9..a79751e05781 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -18,6 +18,8 @@ #include <linux/magic.h> #include <linux/pgtable.h> #include <linux/sizes.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -537,7 +539,7 @@ start_here: ori r0, r0, STACK_END_MAGIC@l stw r0, 0(r1) li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) lis r6, swapper_pg_dir@ha tophys(r6,r6) @@ -625,7 +627,7 @@ start_here: * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ lis r10, MD_TWAM@h @@ -686,6 +688,7 @@ initial_mmu: #endif mtspr SPRN_DER, r8 blr +SYM_FUNC_END(initial_mmu) _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 519b60695167..c51f28b5abc0 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -840,7 +842,7 @@ __secondary_start: lwz r1,TASK_STACK(r1) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 tophys(r3,r1) stw r0,0(r3) @@ -877,7 +879,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ -early_hash_table: +SYM_FUNC_START_LOCAL(early_hash_table) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -888,8 +890,9 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 blr +SYM_FUNC_END(early_hash_table) -load_up_mmu: +SYM_FUNC_START_LOCAL(load_up_mmu) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -918,6 +921,7 @@ BEGIN_MMU_FTR_SECTION LOAD_BAT(7,r3,r4,r5) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(load_up_mmu) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ @@ -966,7 +970,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) /* * Do early platform-specific initialization, * and set up the MMU. @@ -1028,7 +1032,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * this makes sure it's done. * -- Cort */ -clear_bats: +SYM_FUNC_START_LOCAL(clear_bats) li r10,0 mtspr SPRN_DBAT0U,r10 @@ -1072,6 +1076,7 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_IBAT7L,r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(clear_bats) _GLOBAL(update_bats) lis r4, 1f@h @@ -1108,15 +1113,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtspr SPRN_SRR1, r6 rfi -flush_tlbs: +SYM_FUNC_START_LOCAL(flush_tlbs) lis r10, 0x40 1: addic. r10, r10, -0x1000 tlbie r10 bgt 1b sync blr +SYM_FUNC_END(flush_tlbs) -mmu_off: +SYM_FUNC_START_LOCAL(mmu_off) addi r4, r3, __after_mmu_off - _start mfmsr r3 andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ @@ -1128,9 +1134,10 @@ mmu_off: mtspr SPRN_SRR1,r3 sync rfi +SYM_FUNC_END(mmu_off) /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ -initial_bats: +SYM_FUNC_START_LOCAL(initial_bats) lis r11,PAGE_OFFSET@h tophys(r8,r11) #ifdef CONFIG_SMP @@ -1146,9 +1153,10 @@ initial_bats: mtspr SPRN_IBAT0U,r11 isync blr +SYM_FUNC_END(initial_bats) #ifdef CONFIG_BOOTX_TEXT -setup_disp_bat: +SYM_FUNC_START_LOCAL(setup_disp_bat) /* * setup the display bat prepared for us in prom.c */ @@ -1164,10 +1172,11 @@ setup_disp_bat: mtspr SPRN_DBAT3L,r8 mtspr SPRN_DBAT3U,r11 blr +SYM_FUNC_END(setup_disp_bat) #endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_PPC_EARLY_DEBUG_CPM -setup_cpm_bat: +SYM_FUNC_START_LOCAL(setup_cpm_bat) lis r8, 0xf000 ori r8, r8, 0x002a mtspr SPRN_DBAT1L, r8 @@ -1177,10 +1186,11 @@ setup_cpm_bat: mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_cpm_bat) #endif #ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO -setup_usbgecko_bat: +SYM_FUNC_START_LOCAL(setup_usbgecko_bat) /* prepare a BAT for early io */ #if defined(CONFIG_GAMECUBE) lis r8, 0x0c00 @@ -1199,6 +1209,7 @@ setup_usbgecko_bat: mtspr SPRN_DBAT1L, r8 mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_usbgecko_bat) #endif .data diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1cb9d0f7cbf2..37d43c172676 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -84,7 +84,7 @@ END_BTB_FLUSH_SECTION stw r0,GPR0(r1) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r1) + stw r10, STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) @@ -99,7 +99,7 @@ END_BTB_FLUSH_SECTION mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8db1a15d7acb..e1b4e70c8fd0 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -646,7 +646,7 @@ int hw_breakpoint_handler(struct die_args *args) ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; - unsigned long ea; + unsigned long ea = 0; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc839..fccc34489add 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -77,11 +77,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ BEGIN_FTR_SECTION HMT_MEDIUM @@ -96,10 +96,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * but this is the best we can do. */ + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -124,6 +129,7 @@ BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_vectored_\name\()_restore_regs @@ -159,7 +165,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r4,_LINK(r1) ld r5,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() REST_GPR(0, r1) mtcr r2 mtctr r3 @@ -176,7 +182,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -250,11 +256,11 @@ END_BTB_FLUSH_SECTION std r11,_TRAP(r1) std r12,_CCR(r1) std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ /* Calling convention has r3 = regs, r4 = orig r0 */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS mr r4,r0 - LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) - std r11,-16(r3) /* "regshere" marker */ #ifdef CONFIG_PPC_BOOK3S li r11,1 @@ -275,10 +281,15 @@ END_BTB_FLUSH_SECTION wrteei 1 #endif + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() bl system_call_exception .Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ bl syscall_exit_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -315,6 +326,7 @@ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ @@ -342,7 +354,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .Lsyscall_restore_regs: ld r3,_CTR(r1) ld r4,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() mtctr r3 mtspr SPRN_XER,r4 REST_GPR(0, r1) @@ -357,7 +369,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl syscall_exit_restart @@ -388,7 +400,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unrecoverable_exception b . /* should not get here */ #else @@ -406,11 +418,13 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) beq interrupt_return_\srr\()_kernel interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_user_prepare +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr .Lrestore_nvgprs_\srr\()_cont: +#endif std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Linterrupt_return_\srr\()_user_rst_start: @@ -424,6 +438,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS .Lfast_user_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr lbz r4,PACASRR_VALID(r13) @@ -493,9 +508,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b . /* prevent speculative execution */ .Linterrupt_return_\srr\()_user_rst_end: +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS .Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) b .Lrestore_nvgprs_\srr\()_cont +#endif #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: @@ -503,7 +520,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_user_restart @@ -518,7 +535,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr .balign IFETCH_ALIGN_BYTES interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl interrupt_exit_kernel_prepare std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ @@ -585,6 +602,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() cmpdi cr1,r3,0 #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr @@ -637,7 +655,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - std r0,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_INT_FRAME_MARKER(r1) REST_GPRS(2, 5, r1) @@ -684,7 +702,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) LOAD_PACA_TOC() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) bl interrupt_exit_kernel_restart diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9ede61a5a469..c5b9ce887483 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -210,7 +210,7 @@ static __always_inline void call_do_softirq(const void *sp) PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_softirq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", @@ -264,7 +264,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) : // Outputs "+r" (r3) : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_irq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 1a1e9995dae3..ebe4d1645ca1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -191,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + - STACK_FRAME_OVERHEAD); + STACK_INT_FRAME_REGS); unsigned long *ptr = gdb_regs; int reg; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7a89de302609..b20ee72e873a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -157,9 +157,7 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } - preempt_disable(); prev = get_kprobe(p->addr - 1); - preempt_enable_no_resched(); /* * When prev is a ftrace-based kprobe, we don't have an insn, and it @@ -370,7 +368,7 @@ int kprobe_handler(struct pt_regs *regs) if (ret > 0) { restore_previous_kprobe(kcb); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -383,7 +381,7 @@ int kprobe_handler(struct pt_regs *regs) if (p->pre_handler && p->pre_handler(p, regs)) { /* handler changed execution path, so skip ss setup */ reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } @@ -396,7 +394,7 @@ int kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -405,7 +403,7 @@ int kprobe_handler(struct pt_regs *regs) return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } NOKPROBE_SYMBOL(kprobe_handler); @@ -491,7 +489,7 @@ int kprobe_post_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr @@ -530,7 +528,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e5127b19fec2..daf8f87d2372 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -382,7 +382,7 @@ EXPORT_SYMBOL(__bswapdi2) _GLOBAL(start_secondary_resume) /* Reset stack */ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b..c39c07a4c06e 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include <linux/linkage.h> #include <linux/sys.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -353,7 +354,7 @@ _GLOBAL(kexec_smp_wait) * * don't overwrite r3 here, it is live for kexec_wait above. */ -real_mode: /* assume normal blr return */ +SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */ #ifdef CONFIG_PPC_BOOK3E_64 /* Create an identity mapping. */ b kexec_create_tlb @@ -370,6 +371,7 @@ real_mode: /* assume normal blr return */ mtspr SPRN_SRR0,r11 rfid #endif +SYM_FUNC_END(real_mode) /* * kexec_sequence(newstack, start, image, control, clear_all(), @@ -384,7 +386,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3) mr r1,r3 li r0,0 @@ -401,7 +403,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-STACK_FRAME_OVERHEAD-64(r1) + stdu r1,-STACK_FRAME_MIN_SIZE-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7e45dc98df8a..ff045644f13f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -31,6 +31,16 @@ this, and makes other things simpler. Anton? --RR. */ +bool module_elf_check_arch(Elf_Ehdr *hdr) +{ + unsigned long abi_level = hdr->e_flags & 0x3; + + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + return abi_level == 2; + else + return abi_level < 2; +} + #ifdef CONFIG_PPC64_ELF_ABI_V2 static func_desc_t func_desc(unsigned long addr) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 3b1c2236cbee..004fae2044a3 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op, __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + preempt_enable(); } NOKPROBE_SYMBOL(optimized_callback); diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cd4e7bc32609..35932f45fb4e 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -85,7 +85,7 @@ optprobe_template_op_address: TEMPLATE_FOR_IMM_LOAD_INSNS /* 2. pt_regs pointer in r4 */ - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS .global optprobe_template_call_handler optprobe_template_call_handler: @@ -96,7 +96,7 @@ optprobe_template_call_handler: * Parameters for instruction emulation: * 1. Pass SP in register r3. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .global optprobe_template_insn optprobe_template_insn: diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 2d4d21bb46a9..49813f982468 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -21,60 +21,33 @@ * different ABIs, though). */ _GLOBAL(ppc_save_regs) - PPC_STL r0,0*SZL(r3) + /* This allows stack frame accessor macros and offsets to be used */ + subi r3,r3,STACK_INT_FRAME_REGS + PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 - stmw r2, 2*SZL(r3) + stmw r2,GPR2(r3) #else - PPC_STL r2,2*SZL(r3) - PPC_STL r3,3*SZL(r3) - PPC_STL r4,4*SZL(r3) - PPC_STL r5,5*SZL(r3) - PPC_STL r6,6*SZL(r3) - PPC_STL r7,7*SZL(r3) - PPC_STL r8,8*SZL(r3) - PPC_STL r9,9*SZL(r3) - PPC_STL r10,10*SZL(r3) - PPC_STL r11,11*SZL(r3) - PPC_STL r12,12*SZL(r3) - PPC_STL r13,13*SZL(r3) - PPC_STL r14,14*SZL(r3) - PPC_STL r15,15*SZL(r3) - PPC_STL r16,16*SZL(r3) - PPC_STL r17,17*SZL(r3) - PPC_STL r18,18*SZL(r3) - PPC_STL r19,19*SZL(r3) - PPC_STL r20,20*SZL(r3) - PPC_STL r21,21*SZL(r3) - PPC_STL r22,22*SZL(r3) - PPC_STL r23,23*SZL(r3) - PPC_STL r24,24*SZL(r3) - PPC_STL r25,25*SZL(r3) - PPC_STL r26,26*SZL(r3) - PPC_STL r27,27*SZL(r3) - PPC_STL r28,28*SZL(r3) - PPC_STL r29,29*SZL(r3) - PPC_STL r30,30*SZL(r3) - PPC_STL r31,31*SZL(r3) + SAVE_GPRS(2, 31, r3) lbz r0,PACAIRQSOFTMASK(r13) - PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,SOFTE(r3) #endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) - PPC_STL r4,1*SZL(r3) + PPC_STL r4,GPR1(r3) /* get caller's LR */ PPC_LL r0,LRSAVE(r4) - PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK(r3) mflr r0 - PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_NIP(r3) mfmsr r0 - PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_MSR(r3) mfctr r0 - PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CTR(r3) mfxer r0 - PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_XER(r3) mfcr r0 - PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CCR(r3) li r0,0 - PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) - PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_TRAP(r3) + PPC_STL r0,ORIG_GPR3(r3) blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index fcf604370c66..c22cc234672f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) return 0; } -void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); - if (dawr_enabled()) // Power8 or later set_dawr(nr, brk); @@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +{ + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); + set_hw_breakpoint(nr, brk); +} + /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { @@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); +/* Disable the breakpoint in hardware without touching current_brk[] */ +void suspend_breakpoints(void) +{ + struct arch_hw_breakpoint brk = {0}; + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, &brk); +} + +/* + * Re-enable breakpoints suspended by suspend_breakpoints() in hardware + * from current_brk[] + */ +void restore_breakpoints(void) +{ + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i])); +} + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) @@ -1359,7 +1391,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Instruction dump:"); + printk("Code: "); /* * If we were executing with the MMU off for instructions, adjust pc @@ -1373,9 +1405,6 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!(i % 8)) - pr_cont("\n"); - if (!__kernel_text_address(pc) || get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); @@ -1726,13 +1755,17 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) klp_init_thread_info(p); + /* Create initial stack frame. */ + sp -= STACK_USER_INT_FRAME_SIZE; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; + /* Copy registers */ - sp -= sizeof(struct pt_regs); - childregs = (struct pt_regs *) sp; + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); if (unlikely(args->fn)) { /* kernel thread */ + ((unsigned long *)sp)[0] = 0; memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + sizeof(struct pt_regs); + childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; /* function */ if (args->fn) childregs->gpr[14] = ppc_function_entry((void *)args->fn); @@ -1750,6 +1783,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) *childregs = *regs; if (usp) childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; p->thread.regs = childregs; /* 64s sets this in ret_from_fork */ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) @@ -1767,7 +1801,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); - sp -= STACK_FRAME_OVERHEAD; /* * The way this works is that at some point in the future @@ -1777,11 +1810,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - ((unsigned long *)sp)[0] = 0; - sp -= sizeof(struct pt_regs); - kregs = (struct pt_regs *) sp; - sp -= STACK_FRAME_OVERHEAD; + ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f; + sp -= STACK_SWITCH_FRAME_SIZE; + ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; + kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); p->thread.ksp = sp; + #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -2123,9 +2157,12 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, return 0; } - -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes) +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -2141,7 +2178,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, return valid_emergency_stack(sp, p, nbytes); } -EXPORT_SYMBOL(validate_sp); +int validate_sp(unsigned long sp, struct task_struct *p) +{ + return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE); +} static unsigned long ___get_wchan(struct task_struct *p) { @@ -2149,13 +2189,12 @@ static unsigned long ___get_wchan(struct task_struct *p) int count = 0; sp = p->thread.ksp; - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, p)) return 0; do { sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || - task_is_running(p)) + if (!validate_sp(sp, p) || task_is_running(p)) return 0; if (count > 0) { ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); @@ -2209,7 +2248,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = 0; printk("%sCall Trace:\n", loglvl); do { - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, tsk)) break; stack = (unsigned long *) sp; @@ -2230,12 +2269,16 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. - * We look for the "regshere" marker in the current frame. + * We look for the "regs" marker in the current frame. + * + * STACK_SWITCH_FRAME_SIZE being the smallest frame that + * could hold a pt_regs, if that does not fit then it can't + * have regs. */ - if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) - && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE) + && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) - (sp + STACK_FRAME_OVERHEAD); + (sp + STACK_INT_FRAME_REGS); lr = regs->link; printk("%s--- interrupt: %lx at %pS\n", diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1eed87d954ba..4f1c920aa13e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -72,6 +72,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; +unsigned int boot_cpu_node_count __ro_after_init; #endif static phys_addr_t first_memblock_size; static int __initdata boot_cpu_count; @@ -335,6 +336,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_node_count++; + /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); if (!intserv) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index e847f9b1c5b9..deded51a7978 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,43 +7,35 @@ * Copyright (C) 2001 IBM. */ -#include <linux/stdarg.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/init.h> +#define pr_fmt(fmt) "rtas: " fmt + #include <linux/capability.h> #include <linux/delay.h> -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/kernel.h> #include <linux/memblock.h> -#include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/reboot.h> +#include <linux/sched.h> #include <linux/security.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stdarg.h> #include <linux/syscalls.h> -#include <linux/of.h> -#include <linux/of_fdt.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <asm/delay.h> +#include <asm/firmware.h> #include <asm/interrupt.h> -#include <asm/rtas.h> -#include <asm/hvcall.h> #include <asm/machdep.h> -#include <asm/firmware.h> +#include <asm/mmu.h> #include <asm/page.h> -#include <asm/param.h> -#include <asm/delay.h> -#include <linux/uaccess.h> -#include <asm/udbg.h> -#include <asm/syscalls.h> -#include <asm/smp.h> -#include <linux/atomic.h> +#include <asm/rtas.h> #include <asm/time.h> -#include <asm/mmu.h> -#include <asm/topology.h> +#include <asm/udbg.h> /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -353,6 +345,9 @@ int rtas_service_present(const char *service) EXPORT_SYMBOL(rtas_service_present); #ifdef CONFIG_RTAS_ERROR_LOGGING + +static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; + /* * Return the firmware-specified size of the error log buffer * for all rtas calls that require an error buffer argument. @@ -360,21 +355,30 @@ EXPORT_SYMBOL(rtas_service_present); */ int rtas_get_error_log_max(void) { - static int rtas_error_log_max; - if (rtas_error_log_max) - return rtas_error_log_max; - - rtas_error_log_max = rtas_token ("rtas-error-log-max"); - if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || - (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { - printk (KERN_WARNING "RTAS: bad log buffer size %d\n", - rtas_error_log_max); - rtas_error_log_max = RTAS_ERROR_LOG_MAX; - } return rtas_error_log_max; } EXPORT_SYMBOL(rtas_get_error_log_max); +static void __init init_error_log_max(void) +{ + static const char propname[] __initconst = "rtas-error-log-max"; + u32 max; + + if (of_property_read_u32(rtas.dev, propname, &max)) { + pr_warn("%s not found, using default of %u\n", + propname, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + if (max > RTAS_ERROR_LOG_MAX) { + pr_warn("%s = %u, clamping max error log size to %u\n", + propname, max, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + rtas_error_log_max = max; +} + static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; static int rtas_last_error_token; @@ -432,6 +436,7 @@ static char *__fetch_rtas_last_error(char *altbuf) #else /* CONFIG_RTAS_ERROR_LOGGING */ #define __fetch_rtas_last_error(x) NULL #define get_errorlog_buffer() NULL +static void __init init_error_log_max(void) {} #endif @@ -467,6 +472,64 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, static int ibm_open_errinjct_token; static int ibm_errinjct_token; +/** + * rtas_call() - Invoke an RTAS firmware function. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @outputs: Array of @nret output words. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_token(). + * + * The @nargs and @nret arguments must match the number of input and + * output parameters specified for the RTAS function. + * + * rtas_call() returns RTAS status codes, not conventional Linux errno + * values. Callers must translate any failure to an appropriate errno + * in syscall context. Most callers of RTAS functions that can return + * -2 or 990x should use rtas_busy_delay() to correctly handle those + * statuses before calling again. + * + * The return value descriptions are adapted from 7.2.8 [RTAS] Return + * Codes of the PAPR and CHRP specifications. + * + * Context: Process context preferably, interrupt context if + * necessary. Acquires an internal spinlock and may perform + * GFP_ATOMIC slab allocation in error path. Unsafe for NMI + * context. + * Return: + * * 0 - RTAS function call succeeded. + * * -1 - RTAS function encountered a hardware or + * platform error, or the token is invalid, + * or the function is restricted by kernel policy. + * * -2 - Specs say "A necessary hardware device was busy, + * and the requested function could not be + * performed. The operation should be retried at + * a later time." This is misleading, at least with + * respect to current RTAS implementations. What it + * usually means in practice is that the function + * could not be completed while meeting RTAS's + * deadline for returning control to the OS (250us + * for PAPR/PowerVM, typically), but the call may be + * immediately reattempted to resume work on it. + * * -3 - Parameter error. + * * -7 - Unexpected state change. + * * 9000...9899 - Vendor-specific success codes. + * * 9900...9905 - Advisory extended delay. Caller should try + * again after ~10^x ms has elapsed, where x is + * the last digit of the status [0-5]. Again going + * beyond the PAPR text, 990x on PowerVM indicates + * contention for RTAS-internal resources. Other + * RTAS call sequences in progress should be + * allowed to complete before reattempting the + * call. + * * -9000 - Multi-level isolation error. + * * -9999...-9004 - Vendor-specific error codes. + * * Additional negative values - Function-specific error. + * * Additional positive values - Function-specific success. + */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; @@ -657,8 +720,7 @@ static int rtas_error_rc(int rtas_rc) rc = -ENODEV; break; default: - printk(KERN_ERR "%s: unexpected RTAS error %d\n", - __func__, rtas_rc); + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); rc = -ERANGE; break; } @@ -862,8 +924,8 @@ void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); - printk("RTAS system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + pr_emerg("system-reboot returned %d\n", + rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); for (;;); } @@ -872,8 +934,8 @@ void rtas_power_off(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } @@ -882,13 +944,14 @@ void __noreturn rtas_halt(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); for (;;); } /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; +static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; void rtas_os_term(char *str) { @@ -900,19 +963,23 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || - RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) + if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + /* + * Keep calling as long as RTAS returns a "try again" status, + * but don't use rtas_busy_delay(), which potentially + * schedules. + */ do { - status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, + status = rtas_call(ibm_os_term_token, 1, 1, NULL, __pa(rtas_os_term_buf)); - } while (rtas_busy_delay(status)); + } while (rtas_busy_delay_time(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", status); + pr_emerg("ibm,os-term call failed %d\n", status); } /** @@ -983,8 +1050,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log return NULL; } -#ifdef CONFIG_PPC_RTAS_FILTER - /* * The sys_rtas syscall, as originally designed, allows root to pass * arbitrary physical addresses to RTAS calls. A number of RTAS calls @@ -1133,20 +1198,6 @@ static void __init rtas_syscall_filter_init(void) rtas_filters[i].token = rtas_token(rtas_filters[i].name); } -#else - -static bool block_rtas_call(int token, int nargs, - struct rtas_args *args) -{ - return false; -} - -static void __init rtas_syscall_filter_init(void) -{ -} - -#endif /* CONFIG_PPC_RTAS_FILTER */ - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1277,6 +1328,15 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + init_error_log_max(); + + /* + * Discover these now to avoid device tree lookups in the + * panic path. + */ + if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) + ibm_os_term_token = rtas_token("ibm,os-term"); + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 5270b450bbde..cc56ac6ba4b0 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/init.h> @@ -499,6 +500,8 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); static int __init rtas_event_scan_init(void) { + int err; + if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -509,8 +512,8 @@ static int __init rtas_event_scan_init(void) return -ENODEV; } - rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); - if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate); + if (err) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); return -ENODEV; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0da6e59161cd..6b90f10a6c81 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1249,7 +1249,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_PPC64 paca_ptrs[cpu]->__current = idle; paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + - THREAD_SIZE - STACK_FRAME_OVERHEAD; + THREAD_SIZE - STACK_FRAME_MIN_SIZE; #endif task_thread_info(idle)->cpu = cpu; secondary_current = current_set[cpu] = idle; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index a2443d61728e..5de8597eaab8 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -43,7 +43,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task)) return; newsp = stack[0]; @@ -77,7 +77,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common()/EXCEPTION_PROLOG_COMMON(). + * system_call_common(). * * Likewise for non-swapper kernel threads, * this also happens to be the top of the stack @@ -88,13 +88,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * an unreliable stack trace until it's been * _switch()'ed to for the first time. */ - stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + stack_end -= STACK_USER_INT_FRAME_SIZE; } else { /* * idle tasks have a custom stack layout, * c.f. cpu_idle_thread_init(). */ - stack_end -= STACK_FRAME_OVERHEAD; + stack_end -= STACK_FRAME_MIN_SIZE; } if (task == current) @@ -136,7 +136,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* Mark stacktraces with exception frames as unreliable. */ if (sp <= stack_end - STACK_INT_FRAME_SIZE && - stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { return -EINVAL; } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index e0cbd63007f2..ffb79326483c 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/threads.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/cputable.h> @@ -400,7 +402,7 @@ _ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. */ -turn_on_mmu: +SYM_FUNC_START_LOCAL(turn_on_mmu) mflr r4 mtsrr0 r4 mtsrr1 r3 @@ -408,4 +410,5 @@ turn_on_mmu: isync rfi _ASM_NOKPROBE_SYMBOL(turn_on_mmu) +SYM_FUNC_END(turn_on_mmu) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a2ab397065c6..d68de3618741 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; -EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); @@ -151,21 +151,6 @@ bool tb_invalid; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* - * Factor for converting from cputime_t (timebase ticks) to - * microseconds. This is stored as 0.64 fixed-point binary fraction. - */ -u64 __cputime_usec_factor; -EXPORT_SYMBOL(__cputime_usec_factor); - -static void calc_cputime_factors(void) -{ - struct div_result res; - - div128_by_32(1000000, 0, tb_ticks_per_sec, &res); - __cputime_usec_factor = res.result_low; -} - -/* * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ @@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } - -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#define calc_cputime_factors() -#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ void __delay(unsigned long loops) { @@ -914,7 +896,6 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - calc_cputime_factors(); /* * Compute scale factor for sched_clock. diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5a0f023a26e9..9feab5e0485b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) - /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */ std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) @@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ @@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint) stdu r1, -TM_FRAME_SIZE(r1) /* - * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. * This is used for backing up the NVGPRs: */ SAVE_NVGPRS(r1) @@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is now NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* We need to setup MSR for FP/VMX/VSX register save instructions. */ mfmsr r6 diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index d031093bc436..ffb1db386849 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -110,7 +110,7 @@ .endif /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD + addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index a2e7b0ce5b19..6a977b0d8ffc 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -102,3 +102,5 @@ quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< + +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 5cf64740edb8..ffe5d90abe17 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/reg.h> @@ -185,7 +186,7 @@ fphalf: * Internal routine to enable floating point and set FPSCR to 0. * Don't call it from C; it doesn't use the normal calling convention. */ -fpenable: +SYM_FUNC_START_LOCAL(fpenable) #ifdef CONFIG_PPC32 stwu r1,-64(r1) #else @@ -202,6 +203,7 @@ fpenable: mffs fr31 MTFSF_L(fr1) blr +SYM_FUNC_END(fpenable) fpdisable: mtlr r12 diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 60e12b716d3c..af8854f9eae3 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -26,6 +26,7 @@ #include <asm/firmware.h> #include <asm/kexec_ranges.h> #include <asm/crashdump-ppc64.h> +#include <asm/prom.h> struct umem_info { u64 *buf; /* data buffer for usable-memory property */ @@ -929,6 +930,45 @@ out: } /** + * get_cpu_node_size - Compute the size of a CPU node in the FDT. + * This should be done only once and the value is stored in + * a static variable. + * Returns the max size of a CPU node in the FDT. + */ +static unsigned int cpu_node_size(void) +{ + static unsigned int size; + struct device_node *dn; + struct property *pp; + + /* + * Don't compute it twice, we are assuming that the per CPU node size + * doesn't change during the system's life. + */ + if (size) + return size; + + dn = of_find_node_by_type(NULL, "cpu"); + if (WARN_ON_ONCE(!dn)) { + // Unlikely to happen + return 0; + } + + /* + * We compute the sub node size for a CPU node, assuming it + * will be the same for all. + */ + size += strlen(dn->name) + 5; + for_each_property_of_node(dn, pp) { + size += strlen(pp->name); + size += pp->length; + } + + of_node_put(dn); + return size; +} + +/** * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to * setup FDT for kexec/kdump kernel. * @image: kexec image being loaded. @@ -937,6 +977,8 @@ out: */ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) { + unsigned int cpu_nodes, extra_size; + struct device_node *dn; u64 usm_entries; if (image->type != KEXEC_TYPE_CRASH) @@ -949,7 +991,22 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) */ usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); - return (unsigned int)(usm_entries * sizeof(u64)); + + extra_size = (unsigned int)(usm_entries * sizeof(u64)); + + /* + * Get the number of CPU nodes in the current DT. This allows to + * reserve places for CPU nodes added since the boot time. + */ + cpu_nodes = 0; + for_each_node_by_type(dn, "cpu") { + cpu_nodes++; + } + + if (cpu_nodes > boot_cpu_node_count) + extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); + + return extra_size; } /** diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 4939f57b6f6a..7006bcbc2e37 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1202,7 +1202,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize) if (rc < 0) return rc; - resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n", + resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__, resize->hpt.virt); return 0; @@ -1443,7 +1443,7 @@ static void resize_hpt_prepare_work(struct work_struct *work) */ mutex_unlock(&kvm->arch.mmu_setup_lock); - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize_hpt_debug(resize, "%s(): order = %d\n", __func__, resize->order); err = resize_hpt_allocate(resize); @@ -1887,8 +1887,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, tmp); if (ret != H_SUCCESS) { - pr_err("kvm_htab_write ret %ld i=%ld v=%lx " - "r=%lx\n", ret, i, v, r); + pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r); goto out; } if (!mmu_ready && is_vrma_hpte(v)) { diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 40864373ef87..95e738ef9062 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,14 +294,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; struct mm_struct *mm = kvm->mm; - unsigned long npages, size = args->size; + unsigned long npages; int ret; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; - npages = kvmppc_tce_pages(size); + npages = kvmppc_tce_pages(args->size); ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -314,7 +314,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->liobn = args->liobn; stt->page_shift = args->page_shift; stt->offset = args->offset; - stt->size = size; + stt->size = args->size; stt->kvm = kvm; mutex_init(&stt->alloc_lock); INIT_LIST_HEAD_RCU(&stt->iommu_tables); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 59d89e4b154a..c0deeea7eef3 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -9,6 +9,7 @@ * Authors: Alexander Graf <agraf@suse.de> */ +#include <linux/linkage.h> #include <asm/ppc_asm.h> #include <asm/kvm_asm.h> #include <asm/reg.h> @@ -107,7 +108,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * void kvmhv_save_host_pmu(void) */ -kvmhv_save_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu) BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -154,3 +155,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, HSTATE_PMC5(r13) stw r9, HSTATE_PMC6(r13) 31: blr +SYM_FUNC_END(kvmhv_save_host_pmu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98..acf80915f406 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -10,6 +10,8 @@ * Authors: Alexander Graf <agraf@suse.de> */ +#include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/ppc_asm.h> #include <asm/code-patching-asm.h> #include <asm/kvm_asm.h> @@ -1522,12 +1524,14 @@ kvm_flush_link_stack: /* Flush the link stack. On Power8 it's up to 32 entries in size. */ .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr /* And on Power9 it's up to 64. */ BEGIN_FTR_SECTION .rept 32 + ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -2358,7 +2362,7 @@ hmi_realmode: * This routine calls kvmppc_read_intr, a C function, if an external * interrupt is pending. */ -kvmppc_check_wake_reason: +SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason) mfspr r6, SPRN_SRR1 BEGIN_FTR_SECTION rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */ @@ -2427,6 +2431,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) addi r1, r1, PPC_MIN_STKFRM mtlr r0 blr +SYM_FUNC_END(kvmppc_check_wake_reason) /* * Save away FP, VMX and VSX registers. @@ -2434,7 +2439,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_save_fp: +SYM_FUNC_START_LOCAL(kvmppc_save_fp) mflr r30 mr r31,r3 mfmsr r5 @@ -2462,6 +2467,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) stw r6,VCPU_VRSAVE(r31) mtlr r30 blr +SYM_FUNC_END(kvmppc_save_fp) /* * Load up FP, VMX and VSX registers @@ -2469,7 +2475,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * N.B. r30 and r31 are volatile across this function, * thus it is not callable from C. */ -kvmppc_load_fp: +SYM_FUNC_START_LOCAL(kvmppc_load_fp) mflr r30 mr r31,r4 mfmsr r9 @@ -2498,6 +2504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r30 mr r4,r31 blr +SYM_FUNC_END(kvmppc_load_fp) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* @@ -2729,7 +2736,7 @@ kvmppc_bad_host_intr: std r6, SOFTE(r1) LOAD_PACA_TOC() LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER) - std r3, STACK_FRAME_OVERHEAD-16(r1) + std r3, STACK_INT_FRAME_MARKER(r1) /* * XXX On POWER7 and POWER8, we just spin here since we don't @@ -2746,7 +2753,7 @@ kvmppc_bad_host_intr: * r9 has a vcpu pointer (in) * r0 is used as a scratch register */ -kvmppc_msr_interrupt: +SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt) rldicl r0, r11, 64 - MSR_TS_S_LG, 62 cmpwi r0, 2 /* Check if we are in transactional state.. */ ld r11, VCPU_INTR_MSR(r9) @@ -2755,13 +2762,14 @@ kvmppc_msr_interrupt: li r0, 1 1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG blr +SYM_FUNC_END(kvmppc_msr_interrupt) /* * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) * * Load up guest PMU state. R3 points to the vcpu struct. */ -kvmhv_load_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu) mr r4, r3 mflr r0 li r3, 1 @@ -2811,13 +2819,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 blr +SYM_FUNC_END(kvmhv_load_guest_pmu) /* * void kvmhv_load_host_pmu(void) * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -kvmhv_load_host_pmu: +SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu) mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2859,6 +2868,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) isync mtlr r0 23: blr +SYM_FUNC_END(kvmhv_load_host_pmu) /* * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) @@ -2866,7 +2876,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -kvmhv_save_guest_pmu: +SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu) mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2942,6 +2952,7 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCRS, r4 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr +SYM_FUNC_END(kvmhv_save_guest_pmu) /* * This works around a hardware bug on POWER8E processors, where diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e2f11f9c3f2a..1d67baa5557a 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -1190,8 +1190,7 @@ int kvmppc_uvmem_init(void) pfn_first = res->start >> PAGE_SHIFT; pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); - kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), - sizeof(unsigned long), GFP_KERNEL); + kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL); if (!kvmppc_uvmem_bitmap) { ret = -ENOMEM; goto out_unmap; @@ -1215,5 +1214,5 @@ void kvmppc_uvmem_free(void) memunmap_pages(&kvmppc_uvmem_pgmap); release_mem_region(kvmppc_uvmem_pgmap.range.start, range_len(&kvmppc_uvmem_pgmap.range)); - kfree(kvmppc_uvmem_bitmap); + bitmap_free(kvmppc_uvmem_bitmap); } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 4ca23644f752..f4115819e738 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -539,7 +539,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (irq == XICS_IPI || irq == 0) { /* * This barrier orders the setting of xc->cppr vs. - * subsquent test of xc->mfrr done inside + * subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -563,7 +563,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) /* * This barrier orders both setting of in_eoi above vs, * subsequent test of guest_priority, and the setting - * of xc->cppr vs. subsquent test of xc->mfrr done inside + * of xc->cppr vs. subsequent test of xc->mfrr done inside * scan_interrupts and push_pending_to_hw */ smp_mb(); @@ -1785,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) * stale_p (because it has no easy way to address it). Hence we have * to adjust stale_p before shutting down the interrupt. */ -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq) +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { struct irq_data *d = irq_get_irq_data(irq); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -1827,8 +1826,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); @@ -2392,7 +2390,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Now, we select a target if we have one. If we don't we * leave the interrupt untargetted. It means that an interrupt - * can become "untargetted" accross migration if it was masked + * can become "untargetted" across migration if it was masked * by set_xive() but there is little we can do about it. */ diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 1e48f72e8aa5..62bf39f53783 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -299,8 +299,7 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); -void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, - struct kvmppc_xive_vcpu *xc, int irq); +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5271c33fe79e..4f566bea5e10 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -93,8 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Free the escalation irq */ if (xc->esc_virq[i]) { if (kvmppc_xive_has_single_escalation(xc->xive)) - xive_cleanup_single_escalation(vcpu, xc, - xc->esc_virq[i]); + xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7b4920e9fd26..0dce93ccaadf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1015,6 +1015,9 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) u32 last_inst = KVM_INST_FETCH_FAILED; enum emulation_result emulated = EMULATE_DONE; + /* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */ + kvmppc_fix_ee_after_exit(); + /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 8262c14fc9e6..b5fe6fb53c66 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -424,15 +424,6 @@ _GLOBAL(kvmppc_resume_host) mtspr SPRN_EPCR, r3 isync -#ifdef CONFIG_64BIT - /* - * We enter with interrupts disabled in hardware, but - * we need to call RECONCILE_IRQ_STATE to ensure - * that the software state is kept in sync. - */ - RECONCILE_IRQ_STATE(r3,r5) -#endif - /* Switch to kernel stack and jump to handler. */ mr r3, r4 mr r5, r14 /* intno */ diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 315c94946bad..b68e7f26a81f 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -6,6 +6,8 @@ */ #include <linux/pgtable.h> +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -110,18 +112,22 @@ FPS_THREE_IN(fsel) * R8 = (double*)¶m3 [load_three] * LR = instruction call function */ -fpd_load_three: +SYM_FUNC_START_LOCAL(fpd_load_three) lfd 2,0(r8) /* load param3 */ -fpd_load_two: +SYM_FUNC_START_LOCAL(fpd_load_two) lfd 1,0(r7) /* load param2 */ -fpd_load_one: +SYM_FUNC_START_LOCAL(fpd_load_one) lfd 0,0(r6) /* load param1 */ -fpd_load_none: +SYM_FUNC_START_LOCAL(fpd_load_none) lfd 3,0(r3) /* load up fpscr value */ MTFSF_L(3) lwz r6, 0(r4) /* load cr */ mtcr r6 blr +SYM_FUNC_END(fpd_load_none) +SYM_FUNC_END(fpd_load_one) +SYM_FUNC_END(fpd_load_two) +SYM_FUNC_END(fpd_load_three) /* * End of double instruction processing @@ -131,13 +137,14 @@ fpd_load_none: * R5 = (double*)&result * LR = caller of instruction call function */ -fpd_return: +SYM_FUNC_START_LOCAL(fpd_return) mfcr r6 stfd 0,0(r5) /* save result */ mffs 0 stfd 0,0(r3) /* save new fpscr value */ stw r6,0(r4) /* save new cr value */ blr +SYM_FUNC_END(fpd_return) /* * Double operation with no input operand diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8560c912186d..4de71cbf6e8e 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o copy_mc_64.o -ifndef CONFIG_PPC_QUEUED_SPINLOCKS +ifdef CONFIG_PPC_QUEUED_SPINLOCKS +obj-$(CONFIG_SMP) += qspinlock.o +else obj64-$(CONFIG_SMP) += locks.o endif diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd0..b00112d7ad46 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -4,12 +4,17 @@ */ #include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/random.h> #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/cpuhotplug.h> #include <linux/uaccess.h> #include <linux/jump_label.h> +#include <asm/debug.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> @@ -41,12 +46,59 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) return __patch_instruction(addr, instr, addr); } -#ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +struct patch_context { + union { + struct vm_struct *area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t *pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller to perform the CSI and hwsync before + * starting/stopping the temp mm. + */ +static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) +{ + struct mm_struct *orig_mm = current->active_mm; + + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(orig_mm, temp_mm, current); + + WARN_ON(!mm_is_thread_local(temp_mm)); + + suspend_breakpoints(); + return orig_mm; +} + +static void stop_using_temp_mm(struct mm_struct *temp_mm, + struct mm_struct *orig_mm) +{ + lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(temp_mm, orig_mm, current); + restore_breakpoints(); +} + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; @@ -68,29 +120,108 @@ static int text_area_cpu_up(unsigned int cpu) unmap_patch_area(addr); - this_cpu_write(text_poke_area, area); + this_cpu_write(cpu_patching_context.area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); + free_vm_area(this_cpu_read(cpu_patching_context.area)); + this_cpu_write(cpu_patching_context.area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); + return 0; +} + +static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) +{ + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm); + free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); + mmput(mm); +} + +static int text_area_cpu_up_mm(unsigned int cpu) +{ + struct mm_struct *mm; + unsigned long addr; + pte_t *pte; + spinlock_t *ptl; + + mm = mm_alloc(); + if (WARN_ON(!mm)) + goto fail_no_mm; + + /* + * Choose a random page-aligned address from the interval + * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. + * The lower address bound is PAGE_SIZE to avoid the zero-page. + */ + addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; + + /* + * PTE allocation uses GFP_KERNEL which means we need to + * pre-allocate the PTE here because we cannot do the + * allocation during patching when IRQs are disabled. + * + * Using get_locked_pte() to avoid open coding, the lock + * is unnecessary. + */ + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto fail_no_pte; + pte_unmap_unlock(pte, ptl); + + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); + + return 0; + +fail_no_pte: + put_patching_mm(mm, addr); +fail_no_mm: + return -ENOMEM; +} + +static int text_area_cpu_down_mm(unsigned int cpu) +{ + put_patching_mm(this_cpu_read(cpu_patching_context.mm), + this_cpu_read(cpu_patching_context.addr)); + + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + return 0; } static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); -/* - * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and - * we judge it as being preferable to a kernel that will crash later when - * someone tries to use patch_instruction(). - */ void __init poking_init(void) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + int ret; + + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + return; + + if (mm_patch_enabled()) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke_mm:online", + text_area_cpu_up_mm, + text_area_cpu_down_mm); + else + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down); + + /* cpuhp_setup_state returns >= 0 on success */ + if (WARN_ON(ret < 0)) + return; + static_branch_enable(&poking_init_done); } @@ -147,6 +278,56 @@ static void unmap_patch_area(unsigned long addr) flush_tlb_kernel_range(addr, addr + PAGE_SIZE); } +static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + pte_t *pte; + unsigned long pfn = get_patch_pfn(addr); + struct mm_struct *patching_mm; + struct mm_struct *orig_mm; + spinlock_t *ptl; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync": : :"memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instruction(addr, instr, patch_addr); + + /* hwsync performed by __patch_instruction (sync) if successful */ + if (err) + mb(); /* sync */ + + /* context synchronisation performed by __patch_instruction (isync or exception) */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; @@ -155,10 +336,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) pte_t *pte; unsigned long pfn = get_patch_pfn(addr); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); - pte = virt_to_kpte(text_poke_addr); + pte = __this_cpu_read(cpu_patching_context.pte); __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) @@ -172,7 +353,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { int err; unsigned long flags; @@ -182,34 +363,19 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!static_branch_likely(&poking_init_done)) + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || + !static_branch_likely(&poking_init_done)) return raw_patch_instruction(addr, instr); local_irq_save(flags); - err = __do_patch_instruction(addr, instr); + if (mm_patch_enabled()) + err = __do_patch_instruction_mm(addr, instr); + else + err = __do_patch_instruction(addr, instr); local_irq_restore(flags); return err; } -#else /* !CONFIG_STRICT_KERNEL_RWX */ - -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) -{ - return raw_patch_instruction(addr, instr); -} - -#endif /* CONFIG_STRICT_KERNEL_RWX */ - -__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); - -int patch_instruction(u32 *addr, ppc_inst_t instr) -{ - /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) - return 0; - - return do_patch_instruction(addr, instr); -} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 31f40f544de5..80def1c2afcb 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -117,10 +117,64 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BARRIER_NOSPEC +static bool is_fixup_addr_valid(void *dest, size_t size) +{ + return system_state < SYSTEM_FREEING_INITMEM || + !init_section_contains(dest, size); +} + +static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num) +{ + int i; + + for (i = 0; start < end; start++, i++) { + int j; + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + for (j = 0; j < num; j++) + patch_instruction(dest + j, ppc_inst(instrs[j])); + } + return i; +} +#endif + #ifdef CONFIG_PPC_BOOK3S_64 +static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs, + bool do_fallback, void *fallback) +{ + int i; + + for (i = 0; start < end; start++, i++) { + unsigned int *dest = (void *)start + *start; + + if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3)) + continue; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + // See comment in do_entry_flush_fixups() RE order of patching + if (do_fallback) { + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK); + } else { + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + } + } + return i; +} + static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -144,23 +198,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - // See comment in do_entry_flush_fixups() RE order of patching - if (types & STF_BARRIER_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK, + &stf_barrier_fallback); printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -172,7 +211,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) { - unsigned int instrs[6], *dest; + unsigned int instrs[6]; long *start, *end; int i; @@ -206,18 +245,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - patch_instruction(dest + 4, ppc_inst(instrs[4])); - patch_instruction(dest + 5, ppc_inst(instrs[5])); - } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : (types == STF_BARRIER_FALLBACK) ? "fallback" : @@ -274,7 +303,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) void do_uaccess_flush_fixups(enum l1d_flush_type types) { - unsigned int instrs[4], *dest; + unsigned int instrs[4]; long *start, *end; int i; @@ -300,17 +329,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest + 3, ppc_inst(instrs[3])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -325,7 +344,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) static int __do_entry_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -375,42 +394,13 @@ static int __do_entry_flush_fixups(void *data) start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } + i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &entry_flush_fallback); start = PTRRELOC(&__start___scv_entry_flush_fixup); end = PTRRELOC(&__stop___scv_entry_flush_fixup); - for (; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - if (types == L1D_FLUSH_FALLBACK) { - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_branch(dest + 1, - (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); - } else { - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - patch_instruction(dest, ppc_inst(instrs[0])); - } - } - + i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK, + &scv_entry_flush_fallback); printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -438,7 +428,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types) static int __do_rfi_flush_fixups(void *data) { enum l1d_flush_type types = *(enum l1d_flush_type *)data; - unsigned int instrs[3], *dest; + unsigned int instrs[3]; long *start, *end; int i; @@ -462,15 +452,7 @@ static int __do_rfi_flush_fixups(void *data) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - - patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction(dest + 1, ppc_inst(instrs[1])); - patch_instruction(dest + 2, ppc_inst(instrs[2])); - } + i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs)); printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : @@ -512,7 +494,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr, *dest; + unsigned int instr; long *start, *end; int i; @@ -526,12 +508,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr)); - } + i = do_patch_fixups(start, end, &instr, 1); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } @@ -553,7 +530,7 @@ void do_barrier_nospec_fixups(bool enable) #ifdef CONFIG_PPC_E500 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { - unsigned int instr[2], *dest; + unsigned int instr[2]; long *start, *end; int i; @@ -569,13 +546,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ instr[1] = PPC_RAW_SYNC(); } - for (i = 0; start < end; start++, i++) { - dest = (void *)start + *start; - - pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, ppc_inst(instr[0])); - patch_instruction(dest + 1, ppc_inst(instr[1])); - } + i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr)); printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c new file mode 100644 index 000000000000..e4bd145255d0 --- /dev/null +++ b/arch/powerpc/lib/qspinlock.c @@ -0,0 +1,997 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/percpu.h> +#include <linux/processor.h> +#include <linux/smp.h> +#include <linux/topology.h> +#include <linux/sched/clock.h> +#include <asm/qspinlock.h> +#include <asm/paravirt.h> + +#define MAX_NODES 4 + +struct qnode { + struct qnode *next; + struct qspinlock *lock; + int cpu; + int yield_cpu; + u8 locked; /* 1 if lock acquired */ +}; + +struct qnodes { + int count; + struct qnode nodes[MAX_NODES]; +}; + +/* Tuning parameters */ +static int steal_spins __read_mostly = (1 << 5); +static int remote_steal_spins __read_mostly = (1 << 2); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else +static bool maybe_stealers __read_mostly = true; +#endif +static int head_spins __read_mostly = (1 << 8); + +static bool pv_yield_owner __read_mostly = true; +static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; +static bool pv_sleepy_lock __read_mostly = true; +static bool pv_sleepy_lock_sticky __read_mostly = false; +static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; +static int pv_sleepy_lock_factor __read_mostly = 256; +static bool pv_yield_prev __read_mostly = true; +static bool pv_yield_propagate_owner __read_mostly = true; +static bool pv_prod_head __read_mostly = false; + +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); +static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); + +#if _Q_SPIN_SPEC_BARRIER == 1 +#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) +#else +#define spec_barrier() do { } while (0) +#endif + +static __always_inline bool recently_sleepy(void) +{ + /* pv_sleepy_lock is true when this is called */ + if (pv_sleepy_lock_interval_ns) { + u64 seen = this_cpu_read(sleepy_lock_seen_clock); + + if (seen) { + u64 delta = sched_clock() - seen; + if (delta < pv_sleepy_lock_interval_ns) + return true; + this_cpu_write(sleepy_lock_seen_clock, 0); + } + } + + return false; +} + +static __always_inline int get_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return steal_spins * pv_sleepy_lock_factor; + else + return steal_spins; +} + +static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return remote_steal_spins * pv_sleepy_lock_factor; + else + return remote_steal_spins; +} + +static __always_inline int get_head_spins(bool paravirt, bool sleepy) +{ + if (paravirt && sleepy) + return head_spins * pv_sleepy_lock_factor; + else + return head_spins; +} + +static inline u32 encode_tail_cpu(int cpu) +{ + return (cpu + 1) << _Q_TAIL_CPU_OFFSET; +} + +static inline int decode_tail_cpu(u32 val) +{ + return (val >> _Q_TAIL_CPU_OFFSET) - 1; +} + +static inline int get_owner_cpu(u32 val) +{ + return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; +} + +/* + * Try to acquire the lock if it was not already locked. If the tail matches + * mytail then clear it, otherwise leave it unchnaged. Return previous value. + * + * This is used by the head of the queue to acquire the lock and clean up + * its tail if it was the last one queued. + */ +static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) +{ + u32 newval = queued_spin_encode_locked_val(); + u32 prev, tmp; + + asm volatile( +"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" + /* This test is necessary if there could be stealers */ +" andi. %1,%0,%5 \n" +" bne 3f \n" + /* Test whether the lock tail == mytail */ +" and %1,%0,%6 \n" +" cmpw 0,%1,%3 \n" + /* Merge the new locked value */ +" or %1,%1,%4 \n" +" bne 2f \n" + /* If the lock tail matched, then clear it, otherwise leave it. */ +" andc %1,%1,%6 \n" +"2: stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"3: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r"(tail), "r" (newval), + "i" (_Q_LOCKED_VAL), + "r" (_Q_TAIL_CPU_MASK), + "i" (_Q_SPIN_EH_HINT) + : "cr0", "memory"); + + return prev; +} + +/* + * Publish our tail, replacing previous tail. Return previous value. + * + * This provides a release barrier for publishing node, this pairs with the + * acquire barrier in get_tail_qnode() when the next CPU finds this tail + * value. + */ +static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) +{ + u32 prev, tmp; + + asm volatile( +"\t" PPC_RELEASE_BARRIER " \n" +"1: lwarx %0,0,%2 # publish_tail_cpu \n" +" andc %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" + : "=&r" (prev), "=&r"(tmp) + : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 set_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # set_mustq \n" +" or %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline u32 clear_mustq(struct qspinlock *lock) +{ + u32 prev; + + asm volatile( +"1: lwarx %0,0,%1 # clear_mustq \n" +" andc %0,%0,%2 \n" +" stwcx. %0,0,%1 \n" +" bne- 1b \n" + : "=&r" (prev) + : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) + : "cr0", "memory"); + + return prev; +} + +static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) +{ + u32 prev; + u32 new = old | _Q_SLEEPY_VAL; + + BUG_ON(!(old & _Q_LOCKED_VAL)); + BUG_ON(old & _Q_SLEEPY_VAL); + + asm volatile( +"1: lwarx %0,0,%1 # try_set_sleepy \n" +" cmpw 0,%0,%2 \n" +" bne- 2f \n" +" stwcx. %3,0,%1 \n" +" bne- 1b \n" +"2: \n" + : "=&r" (prev) + : "r" (&lock->val), "r"(old), "r" (new) + : "cr0", "memory"); + + return likely(prev == old); +} + +static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } +} + +static __always_inline void seen_sleepy_lock(void) +{ + if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); +} + +static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) +{ + if (pv_sleepy_lock) { + if (pv_sleepy_lock_interval_ns) + this_cpu_write(sleepy_lock_seen_clock, sched_clock()); + if (val & _Q_LOCKED_VAL) { + if (!(val & _Q_SLEEPY_VAL)) + try_set_sleepy(lock, val); + } + } +} + +static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) +{ + int cpu = decode_tail_cpu(val); + struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); + int idx; + + /* + * After publishing the new tail and finding a previous tail in the + * previous val (which is the control dependency), this barrier + * orders the release barrier in publish_tail_cpu performed by the + * last CPU, with subsequently looking at its qnode structures + * after the barrier. + */ + smp_acquire__after_ctrl_dep(); + + for (idx = 0; idx < MAX_NODES; idx++) { + struct qnode *qnode = &qnodesp->nodes[idx]; + if (qnode->lock == lock) + return qnode; + } + + BUG(); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) +{ + int owner; + u32 yield_count; + bool preempted = false; + + BUG_ON(!(val & _Q_LOCKED_VAL)); + + if (!paravirt) + goto relax; + + if (!pv_yield_owner) + goto relax; + + owner = get_owner_cpu(val); + yield_count = yield_count_of(owner); + + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + seen_sleepy_owner(lock, val); + preempted = true; + + /* + * Read the lock word after sampling the yield count. On the other side + * there may a wmb because the yield count update is done by the + * hypervisor preemption and the value update by the OS, however this + * ordering might reduce the chance of out of order accesses and + * improve the heuristic. + */ + smp_rmb(); + + if (READ_ONCE(lock->val) == val) { + if (mustq) + clear_mustq(lock); + yield_to_preempted(owner, yield_count); + if (mustq) + set_mustq(lock); + spin_begin(); + + /* Don't relax if we yielded. Maybe we should? */ + return preempted; + } + spin_begin(); +relax: + spin_cpu_relax(); + + return preempted; +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + return __yield_to_locked_owner(lock, val, paravirt, false); +} + +/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ +static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +{ + bool mustq = false; + + if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) + mustq = true; + + return __yield_to_locked_owner(lock, val, paravirt, mustq); +} + +static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) +{ + struct qnode *next; + int owner; + + if (!paravirt) + return; + if (!pv_yield_propagate_owner) + return; + + owner = get_owner_cpu(val); + if (*set_yield_cpu == owner) + return; + + next = READ_ONCE(node->next); + if (!next) + return; + + if (vcpu_is_preempted(owner)) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } else if (*set_yield_cpu != -1) { + next->yield_cpu = owner; + *set_yield_cpu = owner; + } +} + +/* Called inside spin_begin() */ +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) +{ + int prev_cpu = decode_tail_cpu(val); + u32 yield_count; + int yield_cpu; + bool preempted = false; + + if (!paravirt) + goto relax; + + if (!pv_yield_propagate_owner) + goto yield_prev; + + yield_cpu = READ_ONCE(node->yield_cpu); + if (yield_cpu == -1) { + /* Propagate back the -1 CPU */ + if (node->next && node->next->yield_cpu != -1) + node->next->yield_cpu = yield_cpu; + goto yield_prev; + } + + yield_count = yield_count_of(yield_cpu); + if ((yield_count & 1) == 0) + goto yield_prev; /* owner vcpu is running */ + + spin_end(); + + preempted = true; + seen_sleepy_node(lock, val); + + smp_rmb(); + + if (yield_cpu == node->yield_cpu) { + if (node->next && node->next->yield_cpu != yield_cpu) + node->next->yield_cpu = yield_cpu; + yield_to_preempted(yield_cpu, yield_count); + spin_begin(); + return preempted; + } + spin_begin(); + +yield_prev: + if (!pv_yield_prev) + goto relax; + + yield_count = yield_count_of(prev_cpu); + if ((yield_count & 1) == 0) + goto relax; /* owner vcpu is running */ + + spin_end(); + + preempted = true; + seen_sleepy_node(lock, val); + + smp_rmb(); /* See __yield_to_locked_owner comment */ + + if (!node->locked) { + yield_to_preempted(prev_cpu, yield_count); + spin_begin(); + return preempted; + } + spin_begin(); + +relax: + spin_cpu_relax(); + + return preempted; +} + +static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) +{ + if (iters >= get_steal_spins(paravirt, sleepy)) + return true; + + if (IS_ENABLED(CONFIG_NUMA) && + (iters >= get_remote_steal_spins(paravirt, sleepy))) { + int cpu = get_owner_cpu(val); + if (numa_node_id() != cpu_to_node(cpu)) + return true; + } + return false; +} + +static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) +{ + bool seen_preempted = false; + bool sleepy = false; + int iters = 0; + u32 val; + + if (!steal_spins) { + /* XXX: should spin_on_preempted_owner do anything here? */ + return false; + } + + /* Attempt to steal the lock */ + spin_begin(); + do { + bool preempted = false; + + val = READ_ONCE(lock->val); + if (val & _Q_MUST_Q_VAL) + break; + spec_barrier(); + + if (unlikely(!(val & _Q_LOCKED_VAL))) { + spin_end(); + if (__queued_spin_trylock_steal(lock)) + return true; + spin_begin(); + } else { + preempted = yield_to_locked_owner(lock, val, paravirt); + } + + if (paravirt && pv_sleepy_lock) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + if (preempted) { + seen_preempted = true; + sleepy = true; + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } + } while (!steal_break(val, iters, paravirt, sleepy)); + + spin_end(); + + return false; +} + +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) +{ + struct qnodes *qnodesp; + struct qnode *next, *node; + u32 val, old, tail; + bool seen_preempted = false; + bool sleepy = false; + bool mustq = false; + int idx; + int set_yield_cpu = -1; + int iters = 0; + + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); + + qnodesp = this_cpu_ptr(&qnodes); + if (unlikely(qnodesp->count >= MAX_NODES)) { + spec_barrier(); + while (!queued_spin_trylock(lock)) + cpu_relax(); + return; + } + + idx = qnodesp->count++; + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + node = &qnodesp->nodes[idx]; + node->next = NULL; + node->lock = lock; + node->cpu = smp_processor_id(); + node->yield_cpu = -1; + node->locked = 0; + + tail = encode_tail_cpu(node->cpu); + + old = publish_tail_cpu(lock, tail); + + /* + * If there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_CPU_MASK) { + struct qnode *prev = get_tail_qnode(lock, old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + /* Wait for mcs node lock to be released */ + spin_begin(); + while (!node->locked) { + spec_barrier(); + + if (yield_to_prev(lock, node, old, paravirt)) + seen_preempted = true; + } + spec_barrier(); + spin_end(); + + /* Clear out stale propagated yield_cpu */ + if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) + node->yield_cpu = -1; + + smp_rmb(); /* acquire barrier for the mcs lock */ + + /* + * Generic qspinlocks have this prefetch here, but it seems + * like it could cause additional line transitions because + * the waiter will keep loading from it. + */ + if (_Q_SPIN_PREFETCH_NEXT) { + next = READ_ONCE(node->next); + if (next) + prefetchw(next); + } + } + + /* We're at the head of the waitqueue, wait for the lock. */ +again: + spin_begin(); + for (;;) { + bool preempted; + + val = READ_ONCE(lock->val); + if (!(val & _Q_LOCKED_VAL)) + break; + spec_barrier(); + + if (paravirt && pv_sleepy_lock && maybe_stealers) { + if (!sleepy) { + if (val & _Q_SLEEPY_VAL) { + seen_sleepy_lock(); + sleepy = true; + } else if (recently_sleepy()) { + sleepy = true; + } + } + if (pv_sleepy_lock_sticky && seen_preempted && + !(val & _Q_SLEEPY_VAL)) { + if (try_set_sleepy(lock, val)) + val |= _Q_SLEEPY_VAL; + } + } + + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); + preempted = yield_head_to_locked_owner(lock, val, paravirt); + if (!maybe_stealers) + continue; + + if (preempted) + seen_preempted = true; + + if (paravirt && preempted) { + sleepy = true; + + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } + + if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { + mustq = true; + set_mustq(lock); + val |= _Q_MUST_Q_VAL; + } + } + spec_barrier(); + spin_end(); + + /* If we're the last queued, must clean up the tail. */ + old = trylock_clean_tail(lock, tail); + if (unlikely(old & _Q_LOCKED_VAL)) { + BUG_ON(!maybe_stealers); + goto again; /* Can only be true if maybe_stealers. */ + } + + if ((old & _Q_TAIL_CPU_MASK) == tail) + goto release; /* We were the tail, no next. */ + + /* There is a next, must wait for node->next != NULL (MCS protocol) */ + next = READ_ONCE(node->next); + if (!next) { + spin_begin(); + while (!(next = READ_ONCE(node->next))) + cpu_relax(); + spin_end(); + } + spec_barrier(); + + /* + * Unlock the next mcs waiter node. Release barrier is not required + * here because the acquirer is only accessing the lock word, and + * the acquire barrier we took the lock with orders that update vs + * this store to locked. The corresponding barrier is the smp_rmb() + * acquire barrier for mcs lock, above. + */ + if (paravirt && pv_prod_head) { + int next_cpu = next->cpu; + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + if (vcpu_is_preempted(next_cpu)) + prod_cpu(next_cpu); + } else { + WRITE_ONCE(next->locked, 1); + if (_Q_SPIN_MISO) + asm volatile("miso" ::: "memory"); + } + +release: + qnodesp->count--; /* release the node */ +} + +void queued_spin_lock_slowpath(struct qspinlock *lock) +{ + /* + * This looks funny, but it induces the compiler to inline both + * sides of the branch rather than share code as when the condition + * is passed as the paravirt argument to the functions. + */ + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { + if (try_to_steal_lock(lock, true)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, true); + } else { + if (try_to_steal_lock(lock, false)) { + spec_barrier(); + return; + } + queued_spin_lock_mcs_queue(lock, false); + } +} +EXPORT_SYMBOL(queued_spin_lock_slowpath); + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void) +{ +} +#endif + +#include <linux/debugfs.h> +static int steal_spins_set(void *data, u64 val) +{ +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else + static DEFINE_MUTEX(lock); + + /* + * The lock slow path has a !maybe_stealers case that can assume + * the head of queue will not see concurrent waiters. That waiter + * is unsafe in the presence of stealers, so must keep them away + * from one another. + */ + + mutex_lock(&lock); + if (val && !steal_spins) { + maybe_stealers = true; + /* wait for queue head waiter to go away */ + synchronize_rcu(); + steal_spins = val; + } else if (!val && steal_spins) { + steal_spins = val; + /* wait for all possible stealers to go away */ + synchronize_rcu(); + maybe_stealers = false; + } else { + steal_spins = val; + } + mutex_unlock(&lock); +#endif + + return 0; +} + +static int steal_spins_get(void *data, u64 *val) +{ + *val = steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); + +static int remote_steal_spins_set(void *data, u64 val) +{ + remote_steal_spins = val; + + return 0; +} + +static int remote_steal_spins_get(void *data, u64 *val) +{ + *val = remote_steal_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); + +static int head_spins_set(void *data, u64 val) +{ + head_spins = val; + + return 0; +} + +static int head_spins_get(void *data, u64 *val) +{ + *val = head_spins; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); + +static int pv_yield_owner_set(void *data, u64 val) +{ + pv_yield_owner = !!val; + + return 0; +} + +static int pv_yield_owner_get(void *data, u64 *val) +{ + *val = pv_yield_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); + +static int pv_yield_allow_steal_set(void *data, u64 val) +{ + pv_yield_allow_steal = !!val; + + return 0; +} + +static int pv_yield_allow_steal_get(void *data, u64 *val) +{ + *val = pv_yield_allow_steal; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); + +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + +static int pv_sleepy_lock_set(void *data, u64 val) +{ + pv_sleepy_lock = !!val; + + return 0; +} + +static int pv_sleepy_lock_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); + +static int pv_sleepy_lock_sticky_set(void *data, u64 val) +{ + pv_sleepy_lock_sticky = !!val; + + return 0; +} + +static int pv_sleepy_lock_sticky_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_sticky; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); + +static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) +{ + pv_sleepy_lock_interval_ns = val; + + return 0; +} + +static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_interval_ns; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); + +static int pv_sleepy_lock_factor_set(void *data, u64 val) +{ + pv_sleepy_lock_factor = val; + + return 0; +} + +static int pv_sleepy_lock_factor_get(void *data, u64 *val) +{ + *val = pv_sleepy_lock_factor; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); + +static int pv_yield_prev_set(void *data, u64 val) +{ + pv_yield_prev = !!val; + + return 0; +} + +static int pv_yield_prev_get(void *data, u64 *val) +{ + *val = pv_yield_prev; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); + +static int pv_yield_propagate_owner_set(void *data, u64 val) +{ + pv_yield_propagate_owner = !!val; + + return 0; +} + +static int pv_yield_propagate_owner_get(void *data, u64 *val) +{ + *val = pv_yield_propagate_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); + +static int pv_prod_head_set(void *data, u64 val) +{ + pv_prod_head = !!val; + + return 0; +} + +static int pv_prod_head_get(void *data, u64 *val) +{ + *val = pv_prod_head; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); + +static __init int spinlock_debugfs_init(void) +{ + debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); + debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); + debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); + if (is_shared_processor()) { + debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); + debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); + debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); + debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); + debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); + debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); + debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); + debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); + debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); + } + + return 0; +} +device_initcall(spinlock_debugfs_init); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 398b5694aeb7..38158b77a801 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(STCX, 0, 4); break; -#ifdef __powerpc64__ - case 84: /* ldarx */ - op->type = MKOP(LARX, 0, 8); - break; - - case 214: /* stdcx. */ - op->type = MKOP(STCX, 0, 8); - break; - +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 52: /* lbarx */ op->type = MKOP(LARX, 0, 1); break; @@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 726: /* sthcx. */ op->type = MKOP(STCX, 0, 2); break; +#endif +#ifdef __powerpc64__ + case 84: /* ldarx */ + op->type = MKOP(LARX, 0, 8); + break; + + case 214: /* stdcx. */ + op->type = MKOP(STCX, 0, 8); + break; case 276: /* lqarx */ if (!((rd & 1) || rd == ra || rd == rb)) @@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; val = 0; switch (size) { -#ifdef __powerpc64__ +#ifdef CONFIG_PPC_HAS_LBARX_LHARX case 1: __get_user_asmx(val, ea, err, "lbarx"); break; diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 5473f9d03df3..e2b646a4f7fa 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -16,7 +16,7 @@ _GLOBAL(exec_instr) /* * Stack frame layout (INT_FRAME_SIZE bytes) - * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD) + * In-memory pt_regs (SP + STACK_INT_FRAME_REGS) * Scratch space (SP + 8) * Back chain (SP + 0) */ diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 7de1a8a0c62a..02acbfd05b46 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -16,6 +16,8 @@ #include <asm/machdep.h> #include <asm/mmu.h> +#include "internal.h" + int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) @@ -118,6 +120,9 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 998c6817ed47..954af420f358 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -16,6 +16,8 @@ #include <asm/machdep.h> #include <asm/mmu.h> +#include "internal.h" + /* * Return true, if the entry has a slot value which * the software considers as invalid. @@ -216,6 +218,9 @@ repeat: new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -327,7 +332,12 @@ repeat: new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } + *ptep = __pte(new_pte & ~H_PAGE_BUSY); + return 0; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6df4c6d38b66..80a148c57de8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -995,6 +1046,20 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1980,6 +2045,69 @@ repeat: return slot; } +void hpt_clear_stress(void) +{ + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; + + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } + + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; + } +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index 5045048ce244..a57a25f06a21 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -13,6 +13,17 @@ static inline bool stress_slb(void) return static_branch_unlikely(&stress_slb_key); } +extern bool stress_hpt_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_hpt_key); + +static inline bool stress_hpt(void) +{ + return static_branch_unlikely(&stress_hpt_key); +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group); + void slb_setup_new_exec(void); void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a589298..85c84e89e3ea 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -100,14 +100,14 @@ static void do_serialize(void *arg) } /* - * Serialize against find_current_mm_pte which does lock-less + * Serialize against __find_linux_pte() which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. + * __find_linux_pte() to finish. */ void serialize_against_pte_lookup(struct mm_struct *mm) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 84d171953ba4..8b121df7b08f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -344,7 +344,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - static_branch_enable(&init_mem_is_free); free_initmem_default(POISON_FREE_INITMEM); ftrace_free_init_tramp(); } diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 0d04f9d5da8d..2fb3edafe9ab 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -19,7 +19,6 @@ #include <asm/cacheflush.h> #include <asm/kdump.h> #include <mm/mmu_decl.h> -#include <generated/utsrelease.h> struct regions { unsigned long pa_start; diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 2c15c86c7015..a903b308acc5 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mmu_get_tsize(mmu_virtual_psize), 0); } EXPORT_SYMBOL(local_flush_tlb_page); + +void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0); +} +EXPORT_SYMBOL(local_flush_tlb_page_psize); + #endif /* diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 082f6d0308a4..6b4434dd0ff3 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -27,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) { if (sp & 0xf) return 0; /* must be 16-byte aligned */ - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return 0; if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; @@ -53,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re sp = regs->gpr[1]; perf_callchain_store(entry, perf_instruction_pointer(regs)); - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, current)) return; for (;;) { @@ -61,12 +61,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re next_sp = fp[0]; if (next_sp == sp + STACK_INT_FRAME_SIZE && - fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) && + fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { /* * This looks like an interrupt frame for an * interrupt that occurred in the kernel */ - regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); next_ip = regs->nip; lr = regs->link; level = 0; diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 8965b4463d43..5e86371a20c7 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 /* * Not available for counter_info_version >= 0x8, use * run_instruction_cycles_by_partition(0x100) instead. @@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id) __count(0x10, 8, cycles) ) #include I(REQUEST_END) +#endif #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 @@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#ifdef ENABLE_EVENTS_COUNTERINFO_V6 #define REQUEST_NAME processor_bus_utilization_abc_links #define REQUEST_NUM 0x50 #define REQUEST_IDX_KIND "hw_chip_id=?" @@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx) __count(0x28, 8, instructions_completed) ) #include I(REQUEST_END) +#endif /* Processor_core_power_mode (0x95) skipped, no counters */ /* Affinity_domain_information_by_virtual_processor (0xA0) skipped, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 5eb60ed5b5e8..7ff8ff3509f5 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -70,9 +70,9 @@ static const struct attribute_group format_group = { .attrs = format_attrs, }; -static const struct attribute_group event_group = { +static struct attribute_group event_group = { .name = "events", - .attrs = hv_gpci_event_attrs, + /* .attrs is set in init */ }; #define HV_CAPS_ATTR(_name, _format) \ @@ -330,6 +330,7 @@ static int hv_gpci_init(void) int r; unsigned long hret; struct hv_perf_caps caps; + struct hv_gpci_request_buffer *arg; hv_gpci_assert_offsets_correct(); @@ -353,6 +354,36 @@ static int hv_gpci_init(void) /* sampling not supported */ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the output + * counter_info_version value based on the system hypervisor. + * Pass the counter request 0x10 corresponds to request type + * 'Dispatch_timebase_by_processor', to get the supported + * counter_info_version. + */ + arg->params.counter_request = cpu_to_be32(0x10); + + r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + if (r) { + pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); + arg->params.counter_info_version_out = 0x8; + } + + /* + * Use counter_info_version_out value to assign + * required hv-gpci event list. + */ + if (arg->params.counter_info_version_out >= 0x8) + event_group.attrs = hv_gpci_event_attrs; + else + event_group.attrs = hv_gpci_event_attrs_v6; + + put_cpu_var(hv_gpci_reqb); + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index 4d108262bed7..c72020912dea 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -26,6 +26,7 @@ enum { #define REQUEST_FILE "../hv-gpci-requests.h" #define NAME_LOWER hv_gpci #define NAME_UPPER HV_GPCI +#define ENABLE_EVENTS_COUNTERINFO_V6 #include "req-gen/perf.h" #undef REQUEST_FILE #undef NAME_LOWER diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h index fa9bc804e67a..6b2a59fefffa 100644 --- a/arch/powerpc/perf/req-gen/perf.h +++ b/arch/powerpc/perf/req-gen/perf.h @@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \ #define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ r_fields +/* Generate event list for platforms with counter_info_version 0x6 or below */ +static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = { +#include REQUEST_FILE + NULL +}; + +/* + * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci + * events were deprecated for platform firmware that supports + * counter_info_version 0x8 or above. + * Those deprecated events are still part of platform firmware that + * support counter_info_version 0x6 and below. As per the getPerfCountInfo + * v1.018 documentation there is no counter_info_version 0x7. + * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of + * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms + * that supports counter_info_version 0x8 or above. + */ +#undef ENABLE_EVENTS_COUNTERINFO_V6 + +/* Generate event list for platforms with counter_info_version 0x8 or above*/ static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { #include REQUEST_FILE NULL diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index f03432ef010b..cefa313c09f0 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -5,15 +5,17 @@ * Copyright (c) 2008-2009 PIKA Technologies * Sean MacLennan <smaclennan@pikatech.com> */ +#include <linux/err.h> #include <linux/init.h> #include <linux/of_platform.h> #include <linux/kthread.h> +#include <linux/leds.h> #include <linux/i2c.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/of_gpio.h> +#include <linux/gpio/consumer.h> #include <linux/slab.h> #include <linux/export.h> @@ -92,8 +94,6 @@ static int __init warp_post_info(void) static LIST_HEAD(dtm_shutdown_list); static void __iomem *dtm_fpga; -static unsigned green_led, red_led; - struct dtm_shutdown { struct list_head list; @@ -101,7 +101,6 @@ struct dtm_shutdown { void *arg; }; - int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) { struct dtm_shutdown *shutdown; @@ -132,6 +131,35 @@ int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) return -EINVAL; } +#define WARP_GREEN_LED 0 +#define WARP_RED_LED 1 + +static struct gpio_led warp_gpio_led_pins[] = { + [WARP_GREEN_LED] = { + .name = "green", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, + [WARP_RED_LED] = { + .name = "red", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, +}; + +static struct gpio_led_platform_data warp_gpio_led_data = { + .leds = warp_gpio_led_pins, + .num_leds = ARRAY_SIZE(warp_gpio_led_pins), +}; + +static struct platform_device warp_gpio_leds = { + .name = "leds-gpio", + .id = -1, + .dev = { + .platform_data = &warp_gpio_led_data, + }, +}; + static irqreturn_t temp_isr(int irq, void *context) { struct dtm_shutdown *shutdown; @@ -139,7 +167,7 @@ static irqreturn_t temp_isr(int irq, void *context) local_irq_disable(); - gpio_set_value(green_led, 0); + gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0); /* Run through the shutdown list. */ list_for_each_entry(shutdown, &dtm_shutdown_list, list) @@ -153,7 +181,7 @@ static irqreturn_t temp_isr(int irq, void *context) out_be32(dtm_fpga + 0x14, reset); } - gpio_set_value(red_led, value); + gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value); value ^= 1; mdelay(500); } @@ -162,25 +190,78 @@ static irqreturn_t temp_isr(int irq, void *context) return IRQ_HANDLED; } +/* + * Because green and red power LEDs are normally driven by leds-gpio driver, + * but in case of critical temperature shutdown we want to drive them + * ourselves, we acquire both and then create leds-gpio platform device + * ourselves, instead of doing it through device tree. This way we can still + * keep access to the gpios and use them when needed. + */ static int pika_setup_leds(void) { struct device_node *np, *child; + struct gpio_desc *gpio; + struct gpio_led *led; + int led_count = 0; + int error; + int i; - np = of_find_compatible_node(NULL, NULL, "gpio-leds"); + np = of_find_compatible_node(NULL, NULL, "warp-power-leds"); if (!np) { printk(KERN_ERR __FILE__ ": Unable to find leds\n"); return -ENOENT; } - for_each_child_of_node(np, child) - if (of_node_name_eq(child, "green")) - green_led = of_get_gpio(child, 0); - else if (of_node_name_eq(child, "red")) - red_led = of_get_gpio(child, 0); + for_each_child_of_node(np, child) { + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + + if (!of_node_name_eq(child, led->name)) + continue; + + if (led->gpiod) { + printk(KERN_ERR __FILE__ ": %s led has already been defined\n", + led->name); + continue; + } + + gpio = fwnode_gpiod_get_index(of_fwnode_handle(child), + NULL, 0, GPIOD_ASIS, + led->name); + error = PTR_ERR_OR_ZERO(gpio); + if (error) { + printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n", + led->name, error); + of_node_put(child); + goto err_cleanup_pins; + } + + led->gpiod = gpio; + led_count++; + } + } of_node_put(np); + /* Skip device registration if no leds have been defined */ + if (led_count) { + error = platform_device_register(&warp_gpio_leds); + if (error) { + printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n", + error); + goto err_cleanup_pins; + } + } + return 0; + +err_cleanup_pins: + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + gpiod_put(led->gpiod); + led->gpiod = NULL; + } + return error; } static void pika_setup_critical_temp(struct device_node *np, diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index afee8b1515a8..0b12647e7b42 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/ppc_asm.h> #include <asm/processor.h> @@ -178,7 +180,8 @@ sram_code: /* local udelay in sram is needed */ - udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ +SYM_FUNC_START_LOCAL(udelay) + /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ add r12, r13, r12 /* end */ @@ -187,6 +190,7 @@ sram_code: cmp cr0, r13, r12 blt 1b blr +SYM_FUNC_END(udelay) sram_code_end: @@ -271,7 +275,7 @@ _ASM_NOKPROBE_SYMBOL(lite5200_wakeup) SAVE_SR(n+2, addr+2); \ SAVE_SR(n+3, addr+3); -save_regs: +SYM_FUNC_START_LOCAL(save_regs) stw r0, 0(r4) stw r1, 0x4(r4) stw r2, 0x8(r4) @@ -317,6 +321,7 @@ save_regs: SAVE_SPRN(TBRU, 0x5b) blr +SYM_FUNC_END(save_regs) /* restore registers */ @@ -336,7 +341,7 @@ save_regs: LOAD_SR(n+2, addr+2); \ LOAD_SR(n+3, addr+3); -restore_regs: +SYM_FUNC_START_LOCAL(restore_regs) lis r4, registers@h ori r4, r4, registers@l @@ -393,6 +398,7 @@ restore_regs: blr _ASM_NOKPROBE_SYMBOL(restore_regs) +SYM_FUNC_END(restore_regs) @@ -403,7 +409,7 @@ _ASM_NOKPROBE_SYMBOL(restore_regs) * Flush data cache * Do this by just reading lots of stuff into the cache. */ -flush_data_cache: +SYM_FUNC_START_LOCAL(flush_data_cache) lis r3,CONFIG_KERNEL_START@h ori r3,r3,CONFIG_KERNEL_START@l li r4,NUM_CACHE_LINES @@ -413,3 +419,4 @@ flush_data_cache: addi r3,r3,L1_CACHE_BYTES /* Next line, please */ bdnz 1b blr +SYM_FUNC_END(flush_data_cache) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 48038aaedbd3..6d1dd6e87478 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -59,6 +59,8 @@ static struct mpc52xx_lpbfifo lpbfifo; /** * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred + * + * @req: Pointer to request structure */ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) { @@ -178,6 +180,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) /** * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * On transmit, the dma completion irq triggers before the fifo completion * triggers. Handle the dma completion here instead of the LPB FIFO Bestcomm @@ -216,6 +220,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) * or nested spinlock condition. The out path is non-trivial, so * extra fiddling is done to make sure all paths lead to the same * outbound code. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) { @@ -320,8 +326,12 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) /** * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task + * @irq: IRQ number to be handled + * @dev_id: device ID cookie * * Only used when receiving data. + * + * Return: irqreturn code (%IRQ_HANDLED) */ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) { @@ -372,7 +382,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) } /** - * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion + * mpc52xx_lpbfifo_poll - Poll for DMA completion */ void mpc52xx_lpbfifo_poll(void) { @@ -393,6 +403,8 @@ EXPORT_SYMBOL(mpc52xx_lpbfifo_poll); /** * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request. * @req: Pointer to request structure + * + * Return: %0 on success, -errno code on error */ int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req) { @@ -531,6 +543,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op) err_bcom_rx_irq: bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task); err_bcom_rx: + free_irq(lpbfifo.irq, &lpbfifo); err_irq: iounmap(lpbfifo.regs); lpbfifo.regs = NULL; diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index e12cb44e717f..caa96edf0e72 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -107,7 +107,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto next; unreg: - platform_device_del(pdev); + platform_device_put(pdev); err: pr_err("%pOF: registration failed\n", np); next: diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index e14d1b74d4e4..751395cbf022 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -7,10 +7,13 @@ * Copyright 2012 by Servergy, Inc. */ +#define pr_fmt(fmt) "gpio-halt: " fmt + +#include <linux/err.h> #include <linux/platform_device.h> #include <linux/device.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> -#include <linux/of_gpio.h> #include <linux/of_irq.h> #include <linux/workqueue.h> #include <linux/reboot.h> @@ -18,7 +21,8 @@ #include <asm/machdep.h> -static struct device_node *halt_node; +static struct gpio_desc *halt_gpio; +static int halt_irq; static const struct of_device_id child_match[] = { { @@ -36,23 +40,10 @@ static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn); static void __noreturn gpio_halt_cb(void) { - enum of_gpio_flags flags; - int trigger, gpio; - - if (!halt_node) - panic("No reset GPIO information was provided in DT\n"); - - gpio = of_get_gpio_flags(halt_node, 0, &flags); - - if (!gpio_is_valid(gpio)) - panic("Provided GPIO is invalid\n"); - - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - printk(KERN_INFO "gpio-halt: triggering GPIO.\n"); + pr_info("triggering GPIO.\n"); /* Probably wont return */ - gpio_set_value(gpio, trigger); + gpiod_set_value(halt_gpio, 1); panic("Halt failed\n"); } @@ -61,95 +52,78 @@ static void __noreturn gpio_halt_cb(void) * to handle the shutdown/poweroff. */ static irqreturn_t gpio_halt_irq(int irq, void *__data) { - printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n"); + struct platform_device *pdev = __data; + + dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n"); schedule_work(&gpio_halt_wq); return IRQ_HANDLED; }; -static int gpio_halt_probe(struct platform_device *pdev) +static int __gpio_halt_probe(struct platform_device *pdev, + struct device_node *halt_node) { - enum of_gpio_flags flags; - struct device_node *node = pdev->dev.of_node; - struct device_node *child_node; - int gpio, err, irq; - int trigger; - - if (!node) - return -ENODEV; - - /* If there's no matching child, this isn't really an error */ - child_node = of_find_matching_node(node, child_match); - if (!child_node) - return 0; - - /* Technically we could just read the first one, but punish - * DT writers for invalid form. */ - if (of_gpio_count(child_node) != 1) { - err = -EINVAL; - goto err_put; - } - - /* Get the gpio number relative to the dynamic base. */ - gpio = of_get_gpio_flags(child_node, 0, &flags); - if (!gpio_is_valid(gpio)) { - err = -EINVAL; - goto err_put; - } + int err; - err = gpio_request(gpio, "gpio-halt"); + halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node), + NULL, 0, GPIOD_OUT_LOW, "gpio-halt"); + err = PTR_ERR_OR_ZERO(halt_gpio); if (err) { - printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n", - gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err); + return err; } - trigger = (flags == OF_GPIO_ACTIVE_LOW); - - gpio_direction_output(gpio, !trigger); - /* Now get the IRQ which tells us when the power button is hit */ - irq = irq_of_parse_and_map(child_node, 0); - err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, "gpio-halt", child_node); + halt_irq = irq_of_parse_and_map(halt_node, 0); + err = request_irq(halt_irq, gpio_halt_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "gpio-halt", pdev); if (err) { - printk(KERN_ERR "gpio-halt: error requesting IRQ %d for " - "GPIO %d.\n", irq, gpio); - gpio_free(gpio); - goto err_put; + dev_err(&pdev->dev, "failed to request IRQ %d: %d\n", + halt_irq, err); + gpiod_put(halt_gpio); + halt_gpio = NULL; + return err; } /* Register our halt function */ ppc_md.halt = gpio_halt_cb; pm_power_off = gpio_halt_cb; - printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" - " irq).\n", gpio, trigger, irq); + dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq); - halt_node = child_node; return 0; - -err_put: - of_node_put(child_node); - return err; } -static int gpio_halt_remove(struct platform_device *pdev) +static int gpio_halt_probe(struct platform_device *pdev) { - if (halt_node) { - int gpio = of_get_gpio(halt_node, 0); - int irq = irq_of_parse_and_map(halt_node, 0); + struct device_node *halt_node; + int ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + /* If there's no matching child, this isn't really an error */ + halt_node = of_find_matching_node(pdev->dev.of_node, child_match); + if (!halt_node) + return -ENODEV; + + ret = __gpio_halt_probe(pdev, halt_node); + of_node_put(halt_node); - free_irq(irq, halt_node); + return ret; +} - ppc_md.halt = NULL; - pm_power_off = NULL; +static int gpio_halt_remove(struct platform_device *pdev) +{ + free_irq(halt_irq, pdev); + cancel_work_sync(&gpio_halt_wq); - gpio_free(gpio); + ppc_md.halt = NULL; + pm_power_off = NULL; - of_node_put(halt_node); - halt_node = NULL; - } + gpiod_put(halt_gpio); + halt_gpio = NULL; return 0; } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 0c4eed9aea80..9563336e3348 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -135,6 +135,7 @@ config GENERIC_CPU depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWERPC_CPU bool "Generic 32 bits powerpc" @@ -160,17 +161,20 @@ config POWER7_CPU depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX config POWER9_CPU bool "POWER9" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_HAS_LBARX_LHARX config POWER10_CPU bool "POWER10" @@ -184,6 +188,7 @@ config E5500_CPU config E6500_CPU bool "Freescale e6500" depends on PPC64 && PPC_E500 + select PPC_HAS_LBARX_LHARX config 405_CPU bool "40x family" @@ -575,10 +580,10 @@ config CPU_LITTLE_ENDIAN endchoice config PPC64_ELF_ABI_V1 - def_bool PPC64 && CPU_BIG_ENDIAN + def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2) config PPC64_ELF_ABI_V2 - def_bool PPC64 && CPU_LITTLE_ENDIAN + def_bool PPC64 && !PPC64_ELF_ABI_V1 config PPC64_BOOT_WRAPPER def_bool n diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index bf300167ad6b..913b77b92cea 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -294,7 +294,7 @@ static struct platform_driver gpio_mdio_driver = }, }; -static int gpio_mdio_init(void) +static int __init gpio_mdio_init(void) { struct device_node *np; @@ -314,7 +314,7 @@ static int gpio_mdio_init(void) } module_init(gpio_mdio_init); -static void gpio_mdio_exit(void) +static void __exit gpio_mdio_exit(void) { platform_driver_unregister(&gpio_mdio_driver); if (gpio_regs) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 04daa7f0a03c..4f7ee885a78f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -70,9 +70,7 @@ #undef SHOW_GATWICK_IRQS -int ppc_override_l2cr = 0; -int ppc_override_l2cr_value; -int has_l2cache = 0; +static int has_l2cache; int pmac_newworld; @@ -236,22 +234,16 @@ static void __init l2cr_init(void) const unsigned int *l2cr = of_get_property(np, "l2cr-value", NULL); if (l2cr) { - ppc_override_l2cr = 1; - ppc_override_l2cr_value = *l2cr; _set_L2CR(0); - _set_L2CR(ppc_override_l2cr_value); + _set_L2CR(*l2cr); + pr_info("L2CR overridden (0x%x), backside cache is %s\n", + *l2cr, ((*l2cr) & 0x80000000) ? + "enabled" : "disabled"); } of_node_put(np); break; } } - - if (ppc_override_l2cr) - printk(KERN_INFO "L2CR overridden (0x%x), " - "backside cache is %s\n", - ppc_override_l2cr_value, - (ppc_override_l2cr_value & 0x80000000) - ? "enabled" : "disabled"); } #endif diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 2502e9b17df4..38a7e02295c8 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -466,7 +466,7 @@ static struct attribute *ps3_system_bus_dev_attrs[] = { }; ATTRIBUTE_GROUPS(ps3_system_bus_dev); -struct bus_type ps3_system_bus_type = { +static struct bus_type ps3_system_bus_type = { .name = "ps3_system_bus", .match = ps3_system_bus_match, .uevent = ps3_system_bus_uevent, diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 8e40ccac0f44..6b507b62ce8f 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -154,7 +154,7 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) /** * pseries_eeh_phb_reset - Reset the specified PHB * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * @option: reset option * * Reset the specified PHB/PE @@ -188,7 +188,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in /** * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE * @phb: PCI controller - * @config_adddr: the associated config address + * @config_addr: the associated config address * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered @@ -848,16 +848,7 @@ static int __init eeh_pseries_init(void) } /* Initialize error log size */ - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_info("%s: unknown EEH error log size\n", - __func__); - eeh_error_buf_size = 1024; - } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_info("%s: EEH error log size %d exceeds the maximal %d\n", - __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } + eeh_error_buf_size = rtas_get_error_log_max(); /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index e0a7ac5db15d..090ae5a1e0f5 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -70,6 +70,7 @@ static void pseries_cpu_offline_self(void) xics_teardown_cpu(); unregister_slb_shadow(hwcpu); + unregister_vpa(hwcpu); rtas_stop_self(); /* Should never get here... */ diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 762eb15d3bd4..783c16ad648b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -27,7 +27,9 @@ hcall_tracepoint_refcount: /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. STK_PARAM() in the caller's + * frame will be available even on ELFv2 because these are all + * variadic functions. */ #define HCALL_INST_PRECALL(FIRST_REG) \ mflr r0; \ @@ -41,29 +43,29 @@ hcall_tracepoint_refcount: std r10,STK_PARAM(R10)(r1); \ std r0,16(r1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + stdu r1,-STACK_FRAME_MIN_SIZE(r1); \ bl __trace_hcall_entry; \ - ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ - ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ - ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ - ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ - ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ - ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) + ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ - ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ - std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ mr r4,r3; \ mr r3,r0; \ bl __trace_hcall_exit; \ - ld r0,STACK_FRAME_OVERHEAD+16(r1); \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,STACK_FRAME_MIN_SIZE+16(r1); \ + addi r1,r1,STACK_FRAME_MIN_SIZE; \ ld r3,STK_PARAM(R3)(r1); \ mtlr r0 @@ -303,14 +305,14 @@ plpar_hcall9_trace: mr r7,r8 mr r8,r9 mr r9,r10 - ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) - ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1) HVSC mr r0,r12 - ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) std r4,0(r12) std r5,8(r12) std r6,16(r12) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 561adac69022..c74b71d4733d 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -248,7 +248,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * Set up the page with TCE data, looping through and setting * the values. */ - limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); + limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 634fac5db3f9..4cea71aa0f41 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -635,10 +635,13 @@ retry: prod_others(); } /* - * Execution may have been suspended for several seconds, so - * reset the watchdog. + * Execution may have been suspended for several seconds, so reset + * the watchdogs. touch_nmi_watchdog() also touches the soft lockup + * watchdog. */ + rcu_cpu_stall_reset(); touch_nmi_watchdog(); + return ret; } diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index f4b5b5a64db3..4edd1585e245 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -75,7 +75,7 @@ static int pseries_status_to_err(int rc) case H_FUNCTION: err = -ENXIO; break; - case H_P1: + case H_PARAMETER: case H_P2: case H_P3: case H_P4: @@ -111,7 +111,7 @@ static int pseries_status_to_err(int rc) err = -EEXIST; break; case H_ABORTED: - err = -EINTR; + err = -EIO; break; default: err = -EINVAL; @@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer) if (consumer > PKS_OS_OWNER) return ERR_PTR(-EINVAL); - auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); if (!auth) return ERR_PTR(-ENOMEM); auth->version = 1; auth->consumer = consumer; - auth->rsvd0 = 0; - auth->rsvd1 = 0; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) { - auth->passwordlength = 0; + if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) return auth; - } memcpy(auth->password, ospassword, ospasswordlength); @@ -312,10 +308,6 @@ int plpks_write_var(struct plpks_var var) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to write variable %s for component %s with error %d\n", - var.name, var.component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -350,10 +342,6 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) if (!rc) rc = plpks_confirm_object_flushed(label, auth); - if (rc) - pr_err("Failed to remove variable %s for component %s with error %d\n", - vname.name, component, rc); - rc = pseries_status_to_err(rc); kfree(label); out: @@ -366,22 +354,24 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; struct plpks_auth *auth; - struct label *label; + struct label *label = NULL; u8 *output; int rc; if (var->namelen > MAX_NAME_SIZE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); - label = construct_label(var->component, var->os, var->name, - var->namelen); - if (IS_ERR(label)) { - rc = PTR_ERR(label); - goto out_free_auth; + if (consumer == PKS_OS_OWNER) { + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } } output = kzalloc(maxobjsize, GFP_KERNEL); @@ -390,13 +380,17 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_label; } - rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), - virt_to_phys(label), label->size, virt_to_phys(output), - maxobjsize); + if (consumer == PKS_OS_OWNER) + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + else + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(var->name), var->namelen, virt_to_phys(output), + maxobjsize); + if (rc != H_SUCCESS) { - pr_err("Failed to read variable %s for component %s with error %d\n", - var->name, var->component, rc); rc = pseries_status_to_err(rc); goto out_free_output; } diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h index c6a291367bb1..275ccd86bfb5 100644 --- a/arch/powerpc/platforms/pseries/plpks.h +++ b/arch/powerpc/platforms/pseries/plpks.h @@ -17,7 +17,7 @@ #define WORLDREADABLE 0x08000000 #define SIGNEDUPDATE 0x01000000 -#define PLPKS_VAR_LINUX 0x01 +#define PLPKS_VAR_LINUX 0x02 #define PLPKS_VAR_COMMON 0x04 struct plpks_var { diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 974d3db6faab..b7232c46b244 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1139,6 +1139,19 @@ void __init fsl_pci_assign_primary(void) } /* + * If there's no PCI host bridge with ISA then check for + * PCI host bridge with alias "pci0" (first PCI host bridge). + */ + np = of_find_node_by_path("pci0"); + if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) { + fsl_pci_primary = np; + of_node_put(np); + return; + } + if (np) + of_node_put(np); + + /* * If there's no PCI host bridge with ISA, arbitrarily * designate one as primary. This can go away once * various bugs with primary-less systems are fixed. diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index a439e33eae06..d75064fb7d12 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -20,7 +20,7 @@ #define MPIC_MSGR_REGISTERS_PER_BLOCK 4 #define MPIC_MSGR_STRIDE 0x10 -#define MPIC_MSGR_MER_OFFSET 0x100 +#define MPIC_MSGR_MER_OFFSET (0x100 / sizeof(u32)) #define MSGR_INUSE 0 #define MSGR_FREE 1 @@ -234,7 +234,7 @@ static int mpic_msgr_probe(struct platform_device *dev) reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i; msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE; - msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET); + msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET; msgr->in_use = MSGR_FREE; msgr->num = i; raw_spin_lock_init(&msgr->lock); diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 3925825954bc..19d880ebc5e6 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -535,13 +535,13 @@ static bool __init xive_parse_provisioning(struct device_node *np) static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); + pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) - pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); + pr_err("Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", + pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index e2c8f93b535b..e45419264391 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -439,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); if (!data->trig_mmio) { + iounmap(data->eoi_mmio); pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); return -ENOMEM; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f51c882bf902..0da66bc4823d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1525,9 +1525,9 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { - static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; - int mode; - case 'd': /* bd - hardware data breakpoint */ + case 'd': { /* bd - hardware data breakpoint */ + static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; + int mode; if (xmon_is_ro) { printf(xmon_ro_msg); break; @@ -1560,6 +1560,7 @@ bpt_cmds(void) force_enable_xmon(); break; + } case 'i': /* bi - hardware instr breakpoint */ if (xmon_is_ro) { @@ -1720,7 +1721,6 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp, } #define LRSAVE_OFFSET (STACK_FRAME_LR_SAVE * sizeof(unsigned long)) -#define MARKER_OFFSET (STACK_FRAME_MARKER * sizeof(unsigned long)) static void xmon_show_stack(unsigned long sp, unsigned long lr, unsigned long pc) @@ -1781,14 +1781,13 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, xmon_print_symbol(ip, " ", "\n"); } - /* Look for "regshere" marker to see if this is + /* Look for "regs" marker to see if this is an exception frame. */ - if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) + if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long)) && marker == STACK_FRAME_REGS_MARKER) { - if (mread(sp + STACK_FRAME_OVERHEAD, ®s, sizeof(regs)) - != sizeof(regs)) { + if (mread(sp + STACK_INT_FRAME_REGS, ®s, sizeof(regs)) != sizeof(regs)) { printf("Couldn't read registers at %lx\n", - sp + STACK_FRAME_OVERHEAD); + sp + STACK_INT_FRAME_REGS); break; } printf("--- Exception: %lx %s at ", regs.trap, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index edeee3109a30..3604074a878b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -197,6 +197,7 @@ config X86 select HAVE_CONTEXT_TRACKING_USER_OFFSTACK if HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL + select HAVE_OBJTOOL_NOP_MCOUNT if HAVE_OBJTOOL_MCOUNT select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS |