summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-13 16:39:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-13 16:39:21 -0800
commitf4000cd99750065d5177555c0a805c97174d1b9f (patch)
tree88ab9f09e8fe1e97f34553f7964ee4598e7a0bfc
parent2ec4584eb89b8933d1ee307f2fc9c42e745847d7 (diff)
parent75037120e62b58c536999eb23d70cfcb6d6c0bcc (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - struct thread_info moved off-stack (also touching include/linux/thread_info.h and include/linux/restart_block.h) - cpus_have_cap() reworked to avoid __builtin_constant_p() for static key use (also touching drivers/irqchip/irq-gic-v3.c) - uprobes support (currently only for native 64-bit tasks) - Emulation of kernel Privileged Access Never (PAN) using TTBR0_EL1 switching to a reserved page table - CPU capacity information passing via DT or sysfs (used by the scheduler) - support for systems without FP/SIMD (IOW, kernel avoids touching these registers; there is no soft-float ABI, nor kernel emulation for AArch64 FP/SIMD) - handling of hardware watchpoint with unaligned addresses, varied lengths and offsets from base - use of the page table contiguous hint for kernel mappings - hugetlb fixes for sizes involving the contiguous hint - remove unnecessary I-cache invalidation in flush_cache_range() - CNTHCTL_EL2 access fix for CPUs with VHE support (ARMv8.1) - boot-time checks for writable+executable kernel mappings - simplify asm/opcodes.h and avoid including the 32-bit ARM counterpart and make the arm64 kernel headers self-consistent (Xen headers patch merged separately) - Workaround for broken .inst support in certain binutils versions * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (60 commits) arm64: Disable PAN on uaccess_enable() arm64: Work around broken .inst when defective gas is detected arm64: Add detection code for broken .inst support in binutils arm64: Remove reference to asm/opcodes.h arm64: Get rid of asm/opcodes.h arm64: smp: Prevent raw_smp_processor_id() recursion arm64: head.S: Fix CNTHCTL_EL2 access on VHE system arm64: Remove I-cache invalidation from flush_cache_range() arm64: Enable HIBERNATION in defconfig arm64: Enable CONFIG_ARM64_SW_TTBR0_PAN arm64: xen: Enable user access before a privcmd hvc call arm64: Handle faults caused by inadvertent user access with PAN enabled arm64: Disable TTBR0_EL1 during normal kernel execution arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1 arm64: Factor out TTBR0_EL1 post-update workaround into a specific asm macro arm64: Factor out PAN enabling/disabling into separate uaccess_* macros arm64: Update the synchronous external abort fault description selftests: arm64: add test for unaligned/inexact watchpoint handling arm64: Allow hw watchpoint of length 3,5,6 and 7 arm64: hw_breakpoint: Handle inexact watchpoint addresses ...
-rw-r--r--Documentation/devicetree/bindings/arm/cpu-capacity.txt236
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.txt10
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/Kconfig.debug35
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/assembler.h48
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h30
-rw-r--r--arch/arm64/include/asm/current.h22
-rw-r--r--arch/arm64/include/asm/debug-monitors.h3
-rw-r--r--arch/arm64/include/asm/efi.h26
-rw-r--r--arch/arm64/include/asm/elf.h12
-rw-r--r--arch/arm64/include/asm/futex.h17
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h6
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h7
-rw-r--r--arch/arm64/include/asm/mmu.h3
-rw-r--r--arch/arm64/include/asm/mmu_context.h53
-rw-r--r--arch/arm64/include/asm/neon.h3
-rw-r--r--arch/arm64/include/asm/opcodes.h5
-rw-r--r--arch/arm64/include/asm/percpu.h18
-rw-r--r--arch/arm64/include/asm/perf_event.h2
-rw-r--r--arch/arm64/include/asm/probes.h21
-rw-r--r--arch/arm64/include/asm/ptdump.h22
-rw-r--r--arch/arm64/include/asm/ptrace.h8
-rw-r--r--arch/arm64/include/asm/smp.h14
-rw-r--r--arch/arm64/include/asm/stack_pointer.h9
-rw-r--r--arch/arm64/include/asm/suspend.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h45
-rw-r--r--arch/arm64/include/asm/thread_info.h40
-rw-r--r--arch/arm64/include/asm/uaccess.h175
-rw-r--r--arch/arm64/include/asm/uprobes.h36
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c16
-rw-r--r--arch/arm64/kernel/asm-offsets.c13
-rw-r--r--arch/arm64/kernel/cpufeature.c18
-rw-r--r--arch/arm64/kernel/debug-monitors.c40
-rw-r--r--arch/arm64/kernel/efi.c8
-rw-r--r--arch/arm64/kernel/entry.S110
-rw-r--r--arch/arm64/kernel/fpsimd.c14
-rw-r--r--arch/arm64/kernel/head.S30
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c153
-rw-r--r--arch/arm64/kernel/insn.c1
-rw-r--r--arch/arm64/kernel/kgdb.c3
-rw-r--r--arch/arm64/kernel/probes/Makefile2
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c33
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h8
-rw-r--r--arch/arm64/kernel/probes/kprobes.c36
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c16
-rw-r--r--arch/arm64/kernel/probes/uprobes.c216
-rw-r--r--arch/arm64/kernel/process.c38
-rw-r--r--arch/arm64/kernel/ptrace.c7
-rw-r--r--arch/arm64/kernel/return_address.c1
-rw-r--r--arch/arm64/kernel/setup.c9
-rw-r--r--arch/arm64/kernel/signal.c3
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/smp.c14
-rw-r--r--arch/arm64/kernel/stacktrace.c7
-rw-r--r--arch/arm64/kernel/suspend.c6
-rw-r--r--arch/arm64/kernel/topology.c223
-rw-r--r--arch/arm64/kernel/traps.c28
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S5
-rw-r--r--arch/arm64/kvm/handle_exit.c11
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S9
-rw-r--r--arch/arm64/kvm/hyp/switch.c5
-rw-r--r--arch/arm64/lib/clear_user.S11
-rw-r--r--arch/arm64/lib/copy_from_user.S11
-rw-r--r--arch/arm64/lib/copy_in_user.S11
-rw-r--r--arch/arm64/lib/copy_to_user.S11
-rw-r--r--arch/arm64/mm/Makefile3
-rw-r--r--arch/arm64/mm/cache.S6
-rw-r--r--arch/arm64/mm/context.c7
-rw-r--r--arch/arm64/mm/dma-mapping.c5
-rw-r--r--arch/arm64/mm/dump.c106
-rw-r--r--arch/arm64/mm/fault.c22
-rw-r--r--arch/arm64/mm/flush.c9
-rw-r--r--arch/arm64/mm/hugetlbpage.c22
-rw-r--r--arch/arm64/mm/mmu.c137
-rw-r--r--arch/arm64/mm/proc.S12
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c31
-rw-r--r--arch/arm64/xen/hypercall.S15
-rw-r--r--drivers/firmware/efi/arm-runtime.c4
-rw-r--r--drivers/irqchip/irq-gic-v3.c13
-rw-r--r--include/linux/restart_block.h51
-rw-r--r--include/linux/thread_info.h45
-rw-r--r--include/uapi/linux/hw_breakpoint.h4
-rw-r--r--tools/include/uapi/linux/hw_breakpoint.h4
-rw-r--r--tools/testing/selftests/breakpoints/Makefile5
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c236
90 files changed, 2284 insertions, 525 deletions
diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
new file mode 100644
index 000000000000..7809fbe0cdb7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
@@ -0,0 +1,236 @@
+==========================================
+ARM CPUs capacity bindings
+==========================================
+
+==========================================
+1 - Introduction
+==========================================
+
+ARM systems may be configured to have cpus with different power/performance
+characteristics within the same chip. In this case, additional information has
+to be made available to the kernel for it to be aware of such differences and
+take decisions accordingly.
+
+==========================================
+2 - CPU capacity definition
+==========================================
+
+CPU capacity is a number that provides the scheduler information about CPUs
+heterogeneity. Such heterogeneity can come from micro-architectural differences
+(e.g., ARM big.LITTLE systems) or maximum frequency at which CPUs can run
+(e.g., SMP systems with multiple frequency domains). Heterogeneity in this
+context is about differing performance characteristics; this binding tries to
+capture a first-order approximation of the relative performance of CPUs.
+
+CPU capacities are obtained by running a suitable benchmark. This binding makes
+no guarantees on the validity or suitability of any particular benchmark, the
+final capacity should, however, be:
+
+* A "single-threaded" or CPU affine benchmark
+* Divided by the running frequency of the CPU executing the benchmark
+* Not subject to dynamic frequency scaling of the CPU
+
+For the time being we however advise usage of the Dhrystone benchmark. What
+above thus becomes:
+
+CPU capacities are obtained by running the Dhrystone benchmark on each CPU at
+max frequency (with caches enabled). The obtained DMIPS score is then divided
+by the frequency (in MHz) at which the benchmark has been run, so that
+DMIPS/MHz are obtained. Such values are then normalized w.r.t. the highest
+score obtained in the system.
+
+==========================================
+3 - capacity-dmips-mhz
+==========================================
+
+capacity-dmips-mhz is an optional cpu node [1] property: u32 value
+representing CPU capacity expressed in normalized DMIPS/MHz. At boot time, the
+maximum frequency available to the cpu is then used to calculate the capacity
+value internally used by the kernel.
+
+capacity-dmips-mhz property is all-or-nothing: if it is specified for a cpu
+node, it has to be specified for every other cpu nodes, or the system will
+fall back to the default capacity value for every CPU. If cpufreq is not
+available, final capacities are calculated by directly using capacity-dmips-
+mhz values (normalized w.r.t. the highest value found while parsing the DT).
+
+===========================================
+4 - Examples
+===========================================
+
+Example 1 (ARM 64-bit, 6-cpu system, two clusters):
+capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1)
+supposing cluster0@max-freq=1100 and custer1@max-freq=850,
+final capacities are 1024 for cluster0 and 446 for cluster1
+
+cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&A57_0>;
+ };
+ core1 {
+ cpu = <&A57_1>;
+ };
+ };
+
+ cluster1 {
+ core0 {
+ cpu = <&A53_0>;
+ };
+ core1 {
+ cpu = <&A53_1>;
+ };
+ core2 {
+ cpu = <&A53_2>;
+ };
+ core3 {
+ cpu = <&A53_3>;
+ };
+ };
+ };
+
+ idle-states {
+ entry-method = "arm,psci";
+
+ CPU_SLEEP_0: cpu-sleep-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ local-timer-stop;
+ entry-latency-us = <100>;
+ exit-latency-us = <250>;
+ min-residency-us = <150>;
+ };
+
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x1010000>;
+ local-timer-stop;
+ entry-latency-us = <800>;
+ exit-latency-us = <700>;
+ min-residency-us = <2500>;
+ };
+ };
+
+ A57_0: cpu@0 {
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x0>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A57_L2>;
+ clocks = <&scpi_dvfs 0>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+ A57_1: cpu@1 {
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x1>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A57_L2>;
+ clocks = <&scpi_dvfs 0>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+ A53_0: cpu@100 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x100>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_1: cpu@101 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x101>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_2: cpu@102 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x102>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_3: cpu@103 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x103>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A57_L2: l2-cache0 {
+ compatible = "cache";
+ };
+
+ A53_L2: l2-cache1 {
+ compatible = "cache";
+ };
+};
+
+Example 2 (ARM 32-bit, 4-cpu system, two clusters,
+ cpus 0,1@1GHz, cpus 2,3@500MHz):
+capacities-dmips-mhz are scaled w.r.t. 2 (cpu@0 and cpu@1), this means that first
+cpu@0 and cpu@1 are twice fast than cpu@2 and cpu@3 (at the same frequency)
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ capacity-dmips-mhz = <2>;
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ capacity-dmips-mhz = <2>;
+ };
+
+ cpu2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x100>;
+ capacity-dmips-mhz = <1>;
+ };
+
+ cpu3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x101>;
+ capacity-dmips-mhz = <1>;
+ };
+};
+
+===========================================
+5 - References
+===========================================
+
+[1] ARM Linux Kernel documentation - CPUs bindings
+ Documentation/devicetree/bindings/arm/cpus.txt
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index e6782d50cbcd..c1dcf4cade2e 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -241,6 +241,14 @@ nodes to be present and contain the properties described below.
# List of phandles to idle state nodes supported
by this cpu [3].
+ - capacity-dmips-mhz
+ Usage: Optional
+ Value type: <u32>
+ Definition:
+ # u32 value representing CPU capacity [3] in
+ DMIPS/MHz, relative to highest capacity-dmips-mhz
+ in the system.
+
- rockchip,pmu
Usage: optional for systems that have an "enable-method"
property value of "rockchip,rk3066-smp"
@@ -464,3 +472,5 @@ cpus {
[2] arm/msm/qcom,kpss-acc.txt
[3] ARM Linux kernel documentation - idle states bindings
Documentation/devicetree/bindings/arm/idle-states.txt
+[3] ARM Linux kernel documentation - cpu capacity bindings
+ Documentation/devicetree/bindings/arm/cpu-capacity.txt
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 657be7f5014e..111742126897 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -110,6 +110,7 @@ config ARM64
select POWER_SUPPLY
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
help
ARM 64-bit (AArch64) Linux support.
@@ -239,6 +240,9 @@ config PGTABLE_LEVELS
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -791,6 +795,14 @@ config SETEND_EMULATION
If unsure, say Y
endif
+config ARM64_SW_TTBR0_PAN
+ bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+ help
+ Enabling this option prevents the kernel from accessing
+ user-space memory directly by pointing TTBR0_EL1 to a reserved
+ zeroed area and reserved ASID. The user access routines
+ restore the valid TTBR0_EL1 temporarily.
+
menu "ARMv8.1 architectural features"
config ARM64_HW_AFDBM
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index b661fe742615..d1ebd46872fd 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -2,9 +2,13 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config ARM64_PTDUMP
+config ARM64_PTDUMP_CORE
+ def_bool n
+
+config ARM64_PTDUMP_DEBUGFS
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
+ select ARM64_PTDUMP_CORE
select DEBUG_FS
help
Say Y here if you want to show the kernel pagetable layout in a
@@ -38,6 +42,35 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
of TEXT_OFFSET and platforms must not require a specific
value.
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ select ARM64_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+ This check also includes UXN, which should be set on all kernel
+ mappings.
+
+ Look for a message in dmesg output like this:
+
+ arm64/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ arm64/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 3635b8662724..b9a4a934ca05 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,10 +37,16 @@ $(warning LSE atomics not supported by binutils)
endif
endif
-KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr)
+brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
+
+ifneq ($(brokengasinst),)
+$(warning Detected assembler with broken .inst; disassembly will be unreliable)
+endif
+
+KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads)
-KBUILD_AFLAGS += $(lseinstr)
+KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 6be08113a96d..c3caaddde6cc 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -82,6 +82,7 @@ CONFIG_KEXEC=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
CONFIG_CPU_IDLE=y
+CONFIG_HIBERNATION=y
CONFIG_ARM_CPUIDLE=y
CONFIG_CPU_FREQ=y
CONFIG_CPUFREQ_DT=y
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index b4ab238a59ec..8365a84c2640 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
-generic-y += current.h
generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 28bfe6132eb6..446f6c46d4b1 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -41,6 +41,15 @@
msr daifclr, #2
.endm
+ .macro save_and_disable_irq, flags
+ mrs \flags, daif
+ msr daifset, #2
+ .endm
+
+ .macro restore_irq, flags
+ msr daif, \flags
+ .endm
+
/*
* Enable and disable debug exceptions.
*/
@@ -202,14 +211,25 @@ lr .req x30 // link register
.endm
/*
+ * @dst: Result of per_cpu(sym, smp_processor_id())
* @sym: The name of the per-cpu variable
- * @reg: Result of per_cpu(sym, smp_processor_id())
* @tmp: scratch register
*/
- .macro this_cpu_ptr, sym, reg, tmp
- adr_l \reg, \sym
+ .macro adr_this_cpu, dst, sym, tmp
+ adr_l \dst, \sym
mrs \tmp, tpidr_el1
- add \reg, \reg, \tmp
+ add \dst, \dst, \tmp
+ .endm
+
+ /*
+ * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
+ * @sym: The name of the per-cpu variable
+ * @tmp: scratch register
+ */
+ .macro ldr_this_cpu dst, sym, tmp
+ adr_l \dst, \sym
+ mrs \tmp, tpidr_el1
+ ldr \dst, [\dst, \tmp]
.endm
/*
@@ -395,4 +415,24 @@ alternative_endif
movk \reg, :abs_g0_nc:\val
.endm
+/*
+ * Return the current thread_info.
+ */
+ .macro get_thread_info, rd
+ mrs \rd, sp_el0
+ .endm
+
+/*
+ * Errata workaround post TTBR0_EL1 update.
+ */
+ .macro post_ttbr0_update_workaround
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+alternative_if ARM64_WORKAROUND_CAVIUM_27456
+ ic iallu
+ dsb nsh
+ isb
+alternative_else_nop_endif
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 2e5fb976a572..5a2a6ee65f65 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -65,12 +65,12 @@
* - kaddr - page address
* - size - region size
*/
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
extern void __clean_dcache_area_pou(void *addr, size_t len);
extern long __flush_cache_user_range(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(void *kaddr, unsigned long len);
static inline void flush_cache_mm(struct mm_struct *mm)
{
@@ -81,6 +81,11 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
{
}
+static inline void flush_cache_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
/*
* Cache maintenance functions used by the DMA API. No to be used directly.
*/
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 87b446535185..4174f09678c4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -34,7 +34,8 @@
#define ARM64_HAS_32BIT_EL0 13
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
+#define ARM64_HAS_NO_FPSIMD 16
-#define ARM64_NCAPS 16
+#define ARM64_NCAPS 17
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 0bc0b1de90c4..b4989df48670 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -9,8 +9,6 @@
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
-#include <linux/jump_label.h>
-
#include <asm/cpucaps.h>
#include <asm/hwcap.h>
#include <asm/sysreg.h>
@@ -27,6 +25,8 @@
#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
/* CPU feature register tracking */
@@ -104,14 +104,19 @@ static inline bool cpu_have_feature(unsigned int num)
return elf_hwcap & (1UL << num);
}
+/* System capability check for constant caps */
+static inline bool cpus_have_const_cap(int num)
+{
+ if (num >= ARM64_NCAPS)
+ return false;
+ return static_branch_unlikely(&cpu_hwcap_keys[num]);
+}
+
static inline bool cpus_have_cap(unsigned int num)
{
if (num >= ARM64_NCAPS)
return false;
- if (__builtin_constant_p(num))
- return static_branch_unlikely(&cpu_hwcap_keys[num]);
- else
- return test_bit(num, cpu_hwcaps);
+ return test_bit(num, cpu_hwcaps);
}
static inline void cpus_set_cap(unsigned int num)
@@ -200,7 +205,7 @@ static inline bool cpu_supports_mixed_endian_el0(void)
static inline bool system_supports_32bit_el0(void)
{
- return cpus_have_cap(ARM64_HAS_32BIT_EL0);
+ return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -208,6 +213,17 @@ static inline bool system_supports_mixed_endian_el0(void)
return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
}
+static inline bool system_supports_fpsimd(void)
+{
+ return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+}
+
+static inline bool system_uses_ttbr0_pan(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
+ !cpus_have_cap(ARM64_HAS_PAN);
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
new file mode 100644
index 000000000000..f2bcbe2d9889
--- /dev/null
+++ b/arch/arm64/include/asm/current.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+static __always_inline struct task_struct *get_current(void)
+{
+ return (struct task_struct *)read_sysreg(sp_el0);
+}
+
+#define current get_current()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CURRENT_H */
+
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index b71420a12f26..a44cf5225429 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -68,6 +68,9 @@
#define BRK64_ESR_MASK 0xFFFF
#define BRK64_ESR_KPROBES 0x0004
#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+/* uprobes BRK opcodes with ESR encoding */
+#define BRK64_ESR_UPROBES 0x0005
+#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5))
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 771b3f0bc757..0b6b1633017f 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -1,6 +1,7 @@
#ifndef _ASM_EFI_H
#define _ASM_EFI_H
+#include <asm/cpufeature.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/neon.h>
@@ -78,7 +79,30 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
static inline void efi_set_pgd(struct mm_struct *mm)
{
- switch_mm(NULL, mm, NULL);
+ __switch_mm(mm);
+
+ if (system_uses_ttbr0_pan()) {
+ if (mm != current->active_mm) {
+ /*
+ * Update the current thread's saved ttbr0 since it is
+ * restored as part of a return from exception. Set
+ * the hardware TTBR0_EL1 using cpu_switch_mm()
+ * directly to enable potential errata workarounds.
+ */
+ update_saved_ttbr0(current, mm);
+ cpu_switch_mm(mm->pgd, mm);
+ } else {
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1
+ * until uaccess_enable(). Restore the current
+ * thread's saved ttbr0 corresponding to its active_mm
+ * (if different from init_mm).
+ */
+ cpu_set_reserved_ttbr0();
+ if (current->active_mm != &init_mm)
+ update_saved_ttbr0(current, current->active_mm);
+ }
+ }
}
void efi_virtmap_load(void);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index a55384f4a5d7..5d1700425efe 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -138,7 +138,11 @@ typedef struct user_fpsimd_state elf_fpregset_t;
*/
#define ELF_PLAT_INIT(_r, load_addr) (_r)->regs[0] = 0
-#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
+#define SET_PERSONALITY(ex) \
+({ \
+ clear_bit(TIF_32BIT, &current->mm->context.flags); \
+ clear_thread_flag(TIF_32BIT); \
+})
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#define ARCH_DLINFO \
@@ -183,7 +187,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
-#define COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT);
+#define COMPAT_SET_PERSONALITY(ex) \
+({ \
+ set_bit(TIF_32BIT, &current->mm->context.flags); \
+ set_thread_flag(TIF_32BIT); \
+ })
#define COMPAT_ARCH_DLINFO
extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
int uses_interp);
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index f2585cdd32c2..85c4a8981d47 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -21,15 +21,12 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
-#include <asm/alternative.h>
-#include <asm/cpufeature.h>
#include <asm/errno.h>
-#include <asm/sysreg.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
+do { \
+ uaccess_enable(); \
asm volatile( \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
@@ -44,11 +41,11 @@
" .popsection\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
: "r" (oparg), "Ir" (-EFAULT) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
static inline int
futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
@@ -118,8 +115,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
+ uaccess_enable();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
" sub %w3, %w1, %w4\n"
@@ -134,10 +131,10 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
" .popsection\n"
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 4b)
-ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
: "memory");
+ uaccess_disable();
*uval = val;
return ret;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9510ace570e2..b6b167ac082b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -77,7 +77,11 @@ static inline void decode_ctrl_reg(u32 reg,
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
+#define ARM_BREAKPOINT_LEN_3 0x7
#define ARM_BREAKPOINT_LEN_4 0xf
+#define ARM_BREAKPOINT_LEN_5 0x1f
+#define ARM_BREAKPOINT_LEN_6 0x3f
+#define ARM_BREAKPOINT_LEN_7 0x7f
#define ARM_BREAKPOINT_LEN_8 0xff
/* Kernel stepping */
@@ -119,7 +123,7 @@ struct perf_event;
struct pmu;
extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type);
+ int *gen_len, int *gen_type, int *offset);
extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7e51d1b57c0c..7803343e5881 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
+#include <asm/pgtable.h>
#include <asm/sparsemem.h>
/*
@@ -54,6 +55,12 @@
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+#define RESERVED_TTBR0_SIZE (PAGE_SIZE)
+#else
+#define RESERVED_TTBR0_SIZE (0)
+#endif
+
/* Initial memory map size */
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 8d9fce037b2f..47619411f0ff 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -19,6 +19,7 @@
typedef struct {
atomic64_t id;
void *vdso;
+ unsigned long flags;
} mm_context_t;
/*
@@ -34,7 +35,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot, bool allow_block_mappings);
+ pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a50185375f09..0363fe80455c 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/proc-fns.h>
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
@@ -103,7 +104,7 @@ static inline void cpu_uninstall_idmap(void)
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
- if (mm != &init_mm)
+ if (mm != &init_mm && !system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
}
@@ -163,20 +164,26 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
}
-/*
- * This is the actual mm switch as far as the scheduler
- * is concerned. No registers are touched. We avoid
- * calling the CPU specific function when the mm hasn't
- * actually changed.
- */
-static inline void
-switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
{
- unsigned int cpu = smp_processor_id();
+ if (system_uses_ttbr0_pan()) {
+ BUG_ON(mm->pgd == swapper_pg_dir);
+ task_thread_info(tsk)->ttbr0 =
+ virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ }
+}
+#else
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+}
+#endif
- if (prev == next)
- return;
+static inline void __switch_mm(struct mm_struct *next)
+{
+ unsigned int cpu = smp_processor_id();
/*
* init_mm.pgd does not contain any user mappings and it is always
@@ -190,8 +197,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
check_and_switch_context(next, cpu);
}
+static inline void
+switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (prev != next)
+ __switch_mm(next);
+
+ /*
+ * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
+ * value may have not been initialised yet (activate_mm caller) or the
+ * ASID has changed since the last run (following the context switch
+ * of another thread of the same process). Avoid setting the reserved
+ * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+ */
+ if (next != &init_mm)
+ update_saved_ttbr0(tsk, next);
+}
+
#define deactivate_mm(tsk,mm) do { } while (0)
-#define activate_mm(prev,next) switch_mm(prev, next, NULL)
+#define activate_mm(prev,next) switch_mm(prev, next, current)
void verify_cpu_asid_bits(void);
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 13ce4cc18e26..ad4cdc966c0f 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -9,8 +9,9 @@
*/
#include <linux/types.h>
+#include <asm/fpsimd.h>
-#define cpu_has_neon() (1)
+#define cpu_has_neon() system_supports_fpsimd()
#define kernel_neon_begin() kernel_neon_begin_partial(32)
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
deleted file mode 100644
index 123f45d92cd1..000000000000
--- a/arch/arm64/include/asm/opcodes.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
-#endif
-
-#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 5394c8405e66..3bd498e4de4c 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,6 +16,8 @@
#ifndef __ASM_PERCPU_H
#define __ASM_PERCPU_H
+#include <asm/stack_pointer.h>
+
static inline void set_my_cpu_offset(unsigned long off)
{
asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
@@ -101,16 +103,16 @@ static inline unsigned long __percpu_read(void *ptr, int size)
switch (size) {
case 1:
- ret = ACCESS_ONCE(*(u8 *)ptr);
+ ret = READ_ONCE(*(u8 *)ptr);
break;
case 2:
- ret = ACCESS_ONCE(*(u16 *)ptr);
+ ret = READ_ONCE(*(u16 *)ptr);
break;
case 4:
- ret = ACCESS_ONCE(*(u32 *)ptr);
+ ret = READ_ONCE(*(u32 *)ptr);
break;
case 8:
- ret = ACCESS_ONCE(*(u64 *)ptr);
+ ret = READ_ONCE(*(u64 *)ptr);
break;
default:
BUILD_BUG();
@@ -123,16 +125,16 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
- ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
+ WRITE_ONCE(*(u8 *)ptr, (u8)val);
break;
case 2:
- ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
+ WRITE_ONCE(*(u16 *)ptr, (u16)val);
break;
case 4:
- ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
+ WRITE_ONCE(*(u32 *)ptr, (u32)val);
break;
case 8:
- ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
+ WRITE_ONCE(*(u64 *)ptr, (u64)val);
break;
default:
BUILD_BUG();
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 38b6a2b49d68..8d5cbec17d80 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -17,6 +17,8 @@
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H
+#include <asm/stack_pointer.h>
+
#define ARMV8_PMU_MAX_COUNTERS 32
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 5af574d632fa..6a5b28904c33 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -15,21 +15,22 @@
#ifndef _ARM_PROBES_H
#define _ARM_PROBES_H
-#include <asm/opcodes.h>
-
-struct kprobe;
-struct arch_specific_insn;
-
-typedef u32 kprobe_opcode_t;
-typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+typedef u32 probe_opcode_t;
+typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
/* architecture specific copy of original instruction */
-struct arch_specific_insn {
- kprobe_opcode_t *insn;
+struct arch_probe_insn {
+ probe_opcode_t *insn;
pstate_check_t *pstate_cc;
- kprobes_handler_t *handler;
+ probes_handler_t *handler;
/* restore address after step xol */
unsigned long restore;
};
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
#endif
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 07b8ed037dee..6afd8476c60c 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -16,9 +16,10 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
-#ifdef CONFIG_ARM64_PTDUMP
+#ifdef CONFIG_ARM64_PTDUMP_CORE
#include <linux/mm_types.h>
+#include <linux/seq_file.h>
struct addr_marker {
unsigned long start_address;
@@ -29,16 +30,25 @@ struct ptdump_info {
struct mm_struct *mm;
const struct addr_marker *markers;
unsigned long base_addr;
- unsigned long max_addr;
};
-int ptdump_register(struct ptdump_info *info, const char *name);
-
+void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
-static inline int ptdump_register(struct ptdump_info *info, const char *name)
+static inline int ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name)
{
return 0;
}
-#endif /* CONFIG_ARM64_PTDUMP */
+#endif
+void ptdump_check_wx(void);
+#endif /* CONFIG_ARM64_PTDUMP_CORE */
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_check_wx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index ada08b5b036d..513daf050e84 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -217,6 +217,14 @@ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
#include <asm-generic/ptrace.h>
+#define procedure_link_pointer(regs) ((regs)->regs[30])
+
+static inline void procedure_link_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ procedure_link_pointer(regs) = val;
+}
+
#undef profile_pc
extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 022644704a93..d050d720a1b4 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -29,11 +29,22 @@
#ifndef __ASSEMBLY__
+#include <asm/percpu.h>
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
+
+/*
+ * We don't use this_cpu_read(cpu_number) as that has implicit writes to
+ * preempt_count, and associated (compiler) barriers, that we'd like to avoid
+ * the expense of. If we're preemptible, the value can be stale at use anyway.
+ * And we can't use this_cpu_ptr() either, as that winds up recursing back
+ * here under CONFIG_DEBUG_PREEMPT=y.
+ */
+#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
struct seq_file;
@@ -73,6 +84,7 @@ asmlinkage void secondary_start_kernel(void);
*/
struct secondary_data {
void *stack;
+ struct task_struct *task;
long status;
};
diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h
new file mode 100644
index 000000000000..ffcdf742cddf
--- /dev/null
+++ b/arch/arm64/include/asm/stack_pointer.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_STACK_POINTER_H
+#define __ASM_STACK_POINTER_H
+
+/*
+ * how to get the current stack pointer from C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+#endif /* __ASM_STACK_POINTER_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index b8a313fd7a09..de5600f40adf 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,7 +1,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
-#define NR_CTX_REGS 10
+#define NR_CTX_REGS 12
#define NR_CALLEE_SAVED_REGS 12
/*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6c80b3699cb8..98ae03f8eedd 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,8 +22,6 @@
#include <linux/stringify.h>
-#include <asm/opcodes.h>
-
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -37,6 +35,33 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+#ifndef CONFIG_BROKEN_GAS_INST
+
+#ifdef __ASSEMBLY__
+#define __emit_inst(x) .inst (x)
+#else
+#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
+#endif
+
+#else /* CONFIG_BROKEN_GAS_INST */
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+#define __INSTR_BSWAP(x) (x)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \
+ (((x) << 8) & 0x00ff0000) | \
+ (((x) >> 8) & 0x0000ff00) | \
+ (((x) >> 24) & 0x000000ff))
+#endif /* CONFIG_CPU_BIG_ENDIAN */
+
+#ifdef __ASSEMBLY__
+#define __emit_inst(x) .long __INSTR_BSWAP(x)
+#else /* __ASSEMBLY__ */
+#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_BROKEN_GAS_INST */
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -81,10 +106,10 @@
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
-#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
- (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
- (!!x)<<8 | 0x1f)
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \
+ (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \
+ (!!x)<<8 | 0x1f)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
@@ -228,11 +253,11 @@
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
- .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt)
+ __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
.endm
.macro msr_s, sreg, rt
- .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt)
+ __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt))
.endm
#else
@@ -246,11 +271,11 @@ asm(
" .equ .L__reg_num_xzr, 31\n"
"\n"
" .macro mrs_s, rt, sreg\n"
-" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
+ __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
"\n"
" .macro msr_s, sreg, rt\n"
-" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
+ __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e9ea5a6bd449..46c3b93cf865 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -36,58 +36,31 @@
struct task_struct;
+#include <asm/stack_pointer.h>
#include <asm/types.h>
typedef unsigned long mm_segment_t;
/*
* low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
- struct task_struct *task; /* main task structure */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ u64 ttbr0; /* saved TTBR0_EL1 */
+#endif
int preempt_count; /* 0 => preemptable, <0 => bug */
- int cpu; /* cpu */
};
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
- .flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
}
-#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
-/*
- * how to get the current stack pointer from C
- */
-register unsigned long current_stack_pointer asm ("sp");
-
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-/*
- * struct thread_info can be accessed directly via sp_el0.
- *
- * We don't use read_sysreg() as we want the compiler to cache the value where
- * possible.
- */
-static inline struct thread_info *current_thread_info(void)
-{
- unsigned long sp_el0;
-
- asm ("mrs %0, sp_el0" : "=r" (sp_el0));
-
- return (struct thread_info *)sp_el0;
-}
-
#define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \
@@ -112,6 +85,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
+#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -132,10 +106,12 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ _TIF_UPROBE)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 55d0adbf6509..d26750ca6e06 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -18,6 +18,12 @@
#ifndef __ASM_UACCESS_H
#define __ASM_UACCESS_H
+#include <asm/alternative.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
/*
* User space memory access functions
*/
@@ -26,10 +32,8 @@
#include <linux/string.h>
#include <linux/thread_info.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/ptrace.h>
-#include <asm/sysreg.h>
#include <asm/errno.h>
#include <asm/memory.h>
#include <asm/compiler.h>
@@ -120,6 +124,99 @@ static inline void set_fs(mm_segment_t fs)
" .popsection\n"
/*
+ * User access enabling/disabling.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void __uaccess_ttbr0_disable(void)
+{
+ unsigned long ttbr;
+
+ /* reserved_ttbr0 placed at the end of swapper_pg_dir */
+ ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
+ write_sysreg(ttbr, ttbr0_el1);
+ isb();
+}
+
+static inline void __uaccess_ttbr0_enable(void)
+{
+ unsigned long flags;
+
+ /*
+ * Disable interrupts to avoid preemption between reading the 'ttbr0'
+ * variable and the MSR. A context switch could trigger an ASID
+ * roll-over and an update of 'ttbr0'.
+ */
+ local_irq_save(flags);
+ write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+ isb();
+ local_irq_restore(flags);
+}
+
+static inline bool uaccess_ttbr0_disable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_disable();
+ return true;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_enable();
+ return true;
+}
+#else
+static inline bool uaccess_ttbr0_disable(void)
+{
+ return false;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+ return false;
+}
+#endif
+
+#define __uaccess_disable(alt) \
+do { \
+ if (!uaccess_ttbr0_disable()) \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \
+ CONFIG_ARM64_PAN)); \
+} while (0)
+
+#define __uaccess_enable(alt) \
+do { \
+ if (!uaccess_ttbr0_enable()) \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
+ CONFIG_ARM64_PAN)); \
+} while (0)
+
+static inline void uaccess_disable(void)
+{
+ __uaccess_disable(ARM64_HAS_PAN);
+}
+
+static inline void uaccess_enable(void)
+{
+ __uaccess_enable(ARM64_HAS_PAN);
+}
+
+/*
+ * These functions are no-ops when UAO is present.
+ */
+static inline void uaccess_disable_not_uao(void)
+{
+ __uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+static inline void uaccess_enable_not_uao(void)
+{
+ __uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+/*
* The "__xxx" versions of the user access functions do not verify the address
* space - it must have been done previously with a separate "access_ok()"
* call.
@@ -146,8 +243,7 @@ static inline void set_fs(mm_segment_t fs)
do { \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
@@ -168,9 +264,8 @@ do { \
default: \
BUILD_BUG(); \
} \
+ uaccess_disable_not_uao(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
} while (0)
#define __get_user(x, ptr) \
@@ -215,8 +310,7 @@ do { \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
@@ -237,8 +331,7 @@ do { \
default: \
BUILD_BUG(); \
} \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_disable_not_uao(); \
} while (0)
#define __put_user(x, ptr) \
@@ -331,4 +424,66 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strlen_user(const char __user *str);
extern __must_check long strnlen_user(const char __user *str, long n);
+#else /* __ASSEMBLY__ */
+
+#include <asm/assembler.h>
+
+/*
+ * User access enabling/disabling macros.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ .macro __uaccess_ttbr0_disable, tmp1
+ mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
+ msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
+ isb
+ .endm
+
+ .macro __uaccess_ttbr0_enable, tmp1
+ get_thread_info \tmp1
+ ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
+ isb
+ .endm
+
+ .macro uaccess_ttbr0_disable, tmp1
+alternative_if_not ARM64_HAS_PAN
+ __uaccess_ttbr0_disable \tmp1
+alternative_else_nop_endif
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp2 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1
+ restore_irq \tmp2
+alternative_else_nop_endif
+ .endm
+#else
+ .macro uaccess_ttbr0_disable, tmp1
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .endm
+#endif
+
+/*
+ * These macros are no-ops when UAO is present.
+ */
+ .macro uaccess_disable_not_uao, tmp1
+ uaccess_ttbr0_disable \tmp1
+alternative_if ARM64_ALT_PAN_NOT_UAO
+ SET_PSTATE_PAN(1)
+alternative_else_nop_endif
+ .endm
+
+ .macro uaccess_enable_not_uao, tmp1, tmp2
+ uaccess_ttbr0_enable \tmp1, \tmp2
+alternative_if ARM64_ALT_PAN_NOT_UAO
+ SET_PSTATE_PAN(0)
+alternative_else_nop_endif
+ .endm
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
new file mode 100644
index 000000000000..8d004073d0e8
--- /dev/null
+++ b/arch/arm64/include/asm/uprobes.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/probes.h>
+
+#define MAX_UINSN_BYTES AARCH64_INSN_SIZE
+
+#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES
+#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
+#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+
+typedef u32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+};
+
+struct arch_uprobe {
+ union {
+ u8 insn[MAX_UINSN_BYTES];
+ u8 ixol[MAX_UINSN_BYTES];
+ };
+ struct arch_probe_insn api;
+ bool simulate;
+};
+
+#endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index b0988bb1bf64..04de188a36c9 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -14,10 +14,8 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
-#include <asm/opcodes.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
@@ -285,10 +283,10 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
#define __SWP_LL_SC_LOOPS 4
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
+do { \
+ uaccess_enable(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
"0: ldxr"B" %w2, [%4]\n" \
"1: stxr"B" %w0, %w1, [%4]\n" \
" cbz %w0, 2f\n" \
@@ -306,12 +304,12 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
" .popsection" \
_ASM_EXTABLE(0b, 4b) \
_ASM_EXTABLE(1b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
__user_swpX_asm(data, addr, res, temp, temp2, "")
@@ -352,6 +350,10 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDTEST_FAIL 0
+#define ARM_OPCODE_CONDTEST_PASS 1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
#define ARM_OPCODE_CONDITION_UNCOND 0xf
static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 4a2f0f0fef32..bc049afc73a7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -36,11 +36,13 @@ int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK();
- DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
- DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
- DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
- DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
+ DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+ DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK();
@@ -123,6 +125,7 @@ int main(void)
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c02504ea304b..fdf8f045929f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -47,6 +47,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -746,6 +747,14 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
}
+static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
+
+ return cpuid_feature_extract_signed_field(pfr0,
+ ID_AA64PFR0_FP_SHIFT) < 0;
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -829,6 +838,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.def_scope = SCOPE_SYSTEM,
.matches = hyp_offset_low,
},
+ {
+ /* FP/SIMD is not implemented */
+ .capability = ARM64_HAS_NO_FPSIMD,
+ .def_scope = SCOPE_SYSTEM,
+ .min_field_value = 0,
+ .matches = has_no_fpsimd,
+ },
{},
};
@@ -1102,5 +1118,5 @@ void __init setup_cpu_features(void)
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
- return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+ return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 73ae90ef434c..605df76f0a06 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -226,6 +226,8 @@ static void send_user_sigtrap(int si_code)
static int single_step_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
+ bool handler_found = false;
+
/*
* If we are stepping a pending breakpoint, call the hw_breakpoint
* handler first.
@@ -233,7 +235,14 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
if (!reinstall_suspended_bps(regs))
return 0;
- if (user_mode(regs)) {
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+#endif
+ if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+
+ if (!handler_found && user_mode(regs)) {
send_user_sigtrap(TRAP_TRACE);
/*
@@ -243,15 +252,8 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
* to the active-not-pending state).
*/
user_rewind_single_step(current);
- } else {
-#ifdef CONFIG_KPROBES
- if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-#endif
- if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-
- pr_warning("Unexpected kernel single-step exception at EL1\n");
+ } else if (!handler_found) {
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
/*
* Re-enable stepping since we know that we will be
* returning to regs.
@@ -304,16 +306,20 @@ NOKPROBE_SYMBOL(call_break_hook);
static int brk_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- }
+ bool handler_found = false;
+
#ifdef CONFIG_KPROBES
- else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
- if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED)
- return -EFAULT;
+ if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
+ if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
}
#endif
- else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+ if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+
+ if (!handler_found && user_mode(regs)) {
+ send_user_sigtrap(TRAP_BRKPT);
+ } else if (!handler_found) {
pr_warn("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index ba9bee389fd5..5d17f377d905 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -62,8 +62,8 @@ struct screen_info screen_info __section(.data);
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
- bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE &&
- md->type != EFI_RUNTIME_SERVICES_DATA);
+ bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_RUNTIME_SERVICES_DATA);
if (!PAGE_ALIGNED(md->phys_addr) ||
!PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
@@ -76,12 +76,12 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
* from the MMU routines. So avoid block mappings altogether in
* that case.
*/
- allow_block_mappings = false;
+ page_mappings_only = true;
}
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
- __pgprot(prot_val | PTE_NG), allow_block_mappings);
+ __pgprot(prot_val | PTE_NG), page_mappings_only);
return 0;
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 223d54a4d66b..4f0d76339414 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,7 +29,9 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/ptrace.h>
#include <asm/thread_info.h>
+#include <asm/uaccess.h>
#include <asm/unistd.h>
/*
@@ -90,9 +92,8 @@
.if \el == 0
mrs x21, sp_el0
- mov tsk, sp
- and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear,
- ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
+ ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
+ ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
mov x29, xzr // fp pointed to user-space
@@ -100,15 +101,41 @@
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
- ldr x20, [tsk, #TI_ADDR_LIMIT]
+ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+alternative_if ARM64_HAS_PAN
+ b 1f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ mrs x21, ttbr0_el1
+ tst x21, #0xffff << 48 // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+ .endif
+
+ __uaccess_ttbr0_disable x21
+1:
+#endif
+
stp x22, x23, [sp, #S_PC]
/*
@@ -139,7 +166,7 @@
.if \el != 0
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
@@ -147,6 +174,40 @@
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
+ .endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+alternative_if ARM64_HAS_PAN
+ b 2f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ .endif
+
+ __uaccess_ttbr0_enable x0
+
+ .if \el == 0
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ post_ttbr0_update_workaround
+ .endif
+1:
+ .if \el != 0
+ and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ .endif
+2:
+#endif
+
+ .if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
#ifdef CONFIG_ARM64_ERRATUM_845719
@@ -162,6 +223,7 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
.endif
+
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
@@ -184,23 +246,20 @@ alternative_else_nop_endif
eret // return to kernel
.endm
- .macro get_thread_info, rd
- mrs \rd, sp_el0
- .endm
-
.macro irq_stack_entry
mov x19, sp // preserve the original sp
/*
- * Compare sp with the current thread_info, if the top
- * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
- * should switch to the irq stack.
+ * Compare sp with the base of the task stack.
+ * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+ * and should switch to the irq stack.
*/
- and x25, x19, #~(THREAD_SIZE - 1)
- cmp x25, tsk
- b.ne 9998f
+ ldr x25, [tsk, TSK_STACK]
+ eor x25, x25, x19
+ and x25, x25, #~(THREAD_SIZE - 1)
+ cbnz x25, 9998f
- this_cpu_ptr irq_stack, x25, x26
+ adr_this_cpu x25, irq_stack, x26
mov x26, #IRQ_STACK_START_SP
add x26, x25, x26
@@ -427,9 +486,9 @@ el1_irq:
irq_handler
#ifdef CONFIG_PREEMPT
- ldr w24, [tsk, #TI_PREEMPT] // get preempt count
+ ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
cbnz w24, 1f // preempt count != 0
- ldr x0, [tsk, #TI_FLAGS] // get flags
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
bl el1_preempt
1:
@@ -444,7 +503,7 @@ ENDPROC(el1_irq)
el1_preempt:
mov x24, lr
1: bl preempt_schedule_irq // irq en/disable is done inside
- ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24
#endif
@@ -674,8 +733,7 @@ ENTRY(cpu_switch_to)
ldp x29, x9, [x8], #16
ldr lr, [x8]
mov sp, x9
- and x9, x9, #~(THREAD_SIZE - 1)
- msr sp_el0, x9
+ msr sp_el0, x1
ret
ENDPROC(cpu_switch_to)
@@ -686,7 +744,7 @@ ENDPROC(cpu_switch_to)
ret_fast_syscall:
disable_irq // disable interrupts
str x0, [sp, #S_X0] // returned x0
- ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK
cbnz x2, ret_fast_syscall_trace
and x2, x1, #_TIF_WORK_MASK
@@ -706,14 +764,14 @@ work_pending:
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace
#endif
- ldr x1, [tsk, #TI_FLAGS] // re-check for single-step
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user
/*
* "slow" syscall return path.
*/
ret_to_user:
disable_irq // disable interrupts
- ldr x1, [tsk, #TI_FLAGS]
+ ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
finish_ret_to_user:
@@ -746,7 +804,7 @@ el0_svc_naked: // compat entry point
enable_dbg_and_irq
ct_user_exit 1
- ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace
cmp scno, sc_nr // check upper syscall limit
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 394c61db5566..b883f1f75216 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -127,6 +127,8 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next)
{
+ if (!system_supports_fpsimd())
+ return;
/*
* Save the current FPSIMD state to memory, but only if whatever is in
* the registers is in fact the most recent userland FPSIMD state of
@@ -157,6 +159,8 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
+ if (!system_supports_fpsimd())
+ return;
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -168,6 +172,8 @@ void fpsimd_flush_thread(void)
*/
void fpsimd_preserve_current_state(void)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
@@ -181,6 +187,8 @@ void fpsimd_preserve_current_state(void)
*/
void fpsimd_restore_current_state(void)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -199,6 +207,8 @@ void fpsimd_restore_current_state(void)
*/
void fpsimd_update_current_state(struct fpsimd_state *state)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -228,6 +238,8 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
*/
void kernel_neon_begin_partial(u32 num_regs)
{
+ if (WARN_ON(!system_supports_fpsimd()))
+ return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
@@ -252,6 +264,8 @@ EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void)
{
+ if (!system_supports_fpsimd())
+ return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 332e33193ccf..4b1abac3485a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -326,14 +326,14 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
- adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -412,7 +412,7 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
dmb sy
bl __inval_cache_range
@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
__primary_switched:
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
- msr sp_el0, x4 // Save thread_info
+ adr_l x5, init_task
+ msr sp_el0, x5 // Save thread_info
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
@@ -524,10 +525,21 @@ set_hcr:
msr hcr_el2, x0
isb
- /* Generic timers. */
+ /*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+ cbnz x2, 1f
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
+1:
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
@@ -699,10 +711,10 @@ __secondary_switched:
isb
adr_l x0, secondary_data
- ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
- mov sp, x0
- and x0, x0, #~(THREAD_SIZE - 1)
- msr sp_el0, x0 // save thread_info
+ ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
+ mov sp, x1
+ ldr x2, [x0, #CPU_BOOT_TASK]
+ msr sp_el0, x2
mov x29, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 948b73148d56..1b3c747fedda 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -317,9 +317,21 @@ static int get_hbp_len(u8 hbp_len)
case ARM_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ len_in_bytes = 3;
+ break;
case ARM_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ len_in_bytes = 5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ len_in_bytes = 6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ len_in_bytes = 7;
+ break;
case ARM_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
@@ -349,7 +361,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type)
+ int *gen_len, int *gen_type, int *offset)
{
/* Type */
switch (ctrl.type) {
@@ -369,17 +381,33 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
return -EINVAL;
}
+ if (!ctrl.len)
+ return -EINVAL;
+ *offset = __ffs(ctrl.len);
+
/* Len */
- switch (ctrl.len) {
+ switch (ctrl.len >> *offset) {
case ARM_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case ARM_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ *gen_len = HW_BREAKPOINT_LEN_3;
+ break;
case ARM_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ *gen_len = HW_BREAKPOINT_LEN_5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ *gen_len = HW_BREAKPOINT_LEN_6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ *gen_len = HW_BREAKPOINT_LEN_7;
+ break;
case ARM_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
@@ -423,9 +451,21 @@ static int arch_build_bp_info(struct perf_event *bp)
case HW_BREAKPOINT_LEN_2:
info->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
+ case HW_BREAKPOINT_LEN_3:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+ break;
case HW_BREAKPOINT_LEN_4:
info->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
+ case HW_BREAKPOINT_LEN_5:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+ break;
+ case HW_BREAKPOINT_LEN_6:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+ break;
+ case HW_BREAKPOINT_LEN_7:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+ break;
case HW_BREAKPOINT_LEN_8:
info->ctrl.len = ARM_BREAKPOINT_LEN_8;
break;
@@ -517,18 +557,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
default:
return -EINVAL;
}
-
- info->address &= ~alignment_mask;
- info->ctrl.len <<= offset;
} else {
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE)
alignment_mask = 0x3;
else
alignment_mask = 0x7;
- if (info->address & alignment_mask)
- return -EINVAL;
+ offset = info->address & alignment_mask;
}
+ info->address &= ~alignment_mask;
+ info->ctrl.len <<= offset;
+
/*
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
@@ -661,12 +700,47 @@ unlock:
}
NOKPROBE_SYMBOL(breakpoint_handler);
+/*
+ * Arm64 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See Section D2.10.5 "Determining the memory location that caused a Watchpoint
+ * exception" of ARMv8 Architecture Reference Manual for details.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
+ struct arch_hw_breakpoint_ctrl *ctrl)
+{
+ u64 wp_low, wp_high;
+ u32 lens, lene;
+
+ lens = __ffs(ctrl->len);
+ lene = __fls(ctrl->len);
+
+ wp_low = val + lens;
+ wp_high = val + lene;
+ if (addr < wp_low)
+ return wp_low - addr;
+ else if (addr > wp_high)
+ return addr - wp_high;
+ else
+ return 0;
+}
+
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- int i, step = 0, *kernel_step, access;
+ int i, step = 0, *kernel_step, access, closest_match = 0;
+ u64 min_dist = -1, dist;
u32 ctrl_reg;
- u64 val, alignment_mask;
+ u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
struct arch_hw_breakpoint *info;
@@ -675,35 +749,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
slots = this_cpu_ptr(wp_on_reg);
debug_info = &current->thread.debug;
+ /*
+ * Find all watchpoints that match the reported address. If no exact
+ * match is found. Attribute the hit to the closest watchpoint.
+ */
+ rcu_read_lock();
for (i = 0; i < core_num_wrps; ++i) {
- rcu_read_lock();
-
wp = slots[i];
-
if (wp == NULL)
- goto unlock;
-
- info = counter_arch_bp(wp);
- /* AArch32 watchpoints are either 4 or 8 bytes aligned. */
- if (is_compat_task()) {
- if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
- alignment_mask = 0x7;
- else
- alignment_mask = 0x3;
- } else {
- alignment_mask = 0x7;
- }
-
- /* Check if the watchpoint value matches. */
- val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
- if (val != (addr & ~alignment_mask))
- goto unlock;
-
- /* Possible match, check the byte address select to confirm. */
- ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
- decode_ctrl_reg(ctrl_reg, &ctrl);
- if (!((1 << (addr & alignment_mask)) & ctrl.len))
- goto unlock;
+ continue;
/*
* Check that the access type matches.
@@ -712,18 +766,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
- goto unlock;
+ continue;
+ /* Check if the watchpoint value and byte select match. */
+ val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
+ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
+ decode_ctrl_reg(ctrl_reg, &ctrl);
+ dist = get_distance_from_watchpoint(addr, val, &ctrl);
+ if (dist < min_dist) {
+ min_dist = dist;
+ closest_match = i;
+ }
+ /* Is this an exact match? */
+ if (dist != 0)
+ continue;
+
+ info = counter_arch_bp(wp);
info->trigger = addr;
perf_bp_event(wp, regs);
/* Do we need to handle the stepping? */
if (is_default_overflow_handler(wp))
step = 1;
+ }
+ if (min_dist > 0 && min_dist != -1) {
+ /* No exact match found. */
+ wp = slots[closest_match];
+ info = counter_arch_bp(wp);
+ info->trigger = addr;
+ perf_bp_event(wp, regs);
-unlock:
- rcu_read_unlock();
+ /* Do we need to handle the stepping? */
+ if (is_default_overflow_handler(wp))
+ step = 1;
}
+ rcu_read_unlock();
if (!step)
return 0;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6f2ac4fc66ca..94b62c1fa4df 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,7 +30,6 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
-#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index e017a9493b92..d217c9e95b06 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -247,6 +247,9 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
{
+ if (!kgdb_single_step)
+ return DBG_HOOK_ERROR;
+
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return 0;
}
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
index ce06312e3d34..89b6df613dde 100644
--- a/arch/arm64/kernel/probes/Makefile
+++ b/arch/arm64/kernel/probes/Makefile
@@ -1,3 +1,5 @@
obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \
kprobes_trampoline.o \
simulate-insn.o
+obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o \
+ simulate-insn.o
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index d1731bf977ef..6bf6657a5a52 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -17,7 +17,6 @@
#include <linux/kprobes.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
-#include <asm/kprobes.h>
#include <asm/insn.h>
#include <asm/sections.h>
@@ -78,8 +77,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD If instruction is supported and uses instruction slot,
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
-static enum kprobe_insn __kprobes
-arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
{
/*
* Instructions reading or modifying the PC won't work from the XOL
@@ -89,26 +88,26 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD;
if (aarch64_insn_is_bcond(insn)) {
- asi->handler = simulate_b_cond;
+ api->handler = simulate_b_cond;
} else if (aarch64_insn_is_cbz(insn) ||
aarch64_insn_is_cbnz(insn)) {
- asi->handler = simulate_cbz_cbnz;
+ api->handler = simulate_cbz_cbnz;
} else if (aarch64_insn_is_tbz(insn) ||
aarch64_insn_is_tbnz(insn)) {
- asi->handler = simulate_tbz_tbnz;
+ api->handler = simulate_tbz_tbnz;
} else if (aarch64_insn_is_adr_adrp(insn)) {
- asi->handler = simulate_adr_adrp;
+ api->handler = simulate_adr_adrp;
} else if (aarch64_insn_is_b(insn) ||
aarch64_insn_is_bl(insn)) {
- asi->handler = simulate_b_bl;
+ api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
- asi->handler = simulate_br_blr_ret;
+ api->handler = simulate_br_blr_ret;
} else if (aarch64_insn_is_ldr_lit(insn)) {
- asi->handler = simulate_ldr_literal;
+ api->handler = simulate_ldr_literal;
} else if (aarch64_insn_is_ldrsw_lit(insn)) {
- asi->handler = simulate_ldrsw_literal;
+ api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -120,6 +119,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD_NO_SLOT;
}
+#ifdef CONFIG_KPROBES
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
@@ -138,12 +138,12 @@ is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
return false;
}
-enum kprobe_insn __kprobes
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
- enum kprobe_insn decoded;
- kprobe_opcode_t insn = le32_to_cpu(*addr);
- kprobe_opcode_t *scan_end = NULL;
+ enum probe_insn decoded;
+ probe_opcode_t insn = le32_to_cpu(*addr);
+ probe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
/*
@@ -162,7 +162,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, asi);
+ decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
@@ -170,3 +170,4 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
return decoded;
}
+#endif
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index d438289646a6..76d3f315407f 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -23,13 +23,17 @@
*/
#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
-enum kprobe_insn {
+enum probe_insn {
INSN_REJECTED,
INSN_GOOD_NO_SLOT,
INSN_GOOD,
};
-enum kprobe_insn __kprobes
+#ifdef CONFIG_KPROBES
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+#endif
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f5077ea7af6d..1decd2b2c730 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,31 +44,31 @@ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
- p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+ p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
- flush_icache_range((uintptr_t) (p->ainsn.insn),
- (uintptr_t) (p->ainsn.insn) +
+ flush_icache_range((uintptr_t) (p->ainsn.api.insn),
+ (uintptr_t) (p->ainsn.api.insn) +
MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.restore = (unsigned long) p->addr +
+ p->ainsn.api.restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.restore = 0;
+ p->ainsn.api.restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (p->ainsn.handler)
- p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(kcb, regs);
@@ -98,18 +98,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.insn = NULL;
+ p->ainsn.api.insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn)
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
return -ENOMEM;
break;
};
/* prepare the instruction */
- if (p->ainsn.insn)
+ if (p->ainsn.api.insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -142,9 +142,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.insn) {
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
+ if (p->ainsn.api.insn) {
+ free_insn_slot(p->ainsn.api.insn, 0);
+ p->ainsn.api.insn = NULL;
}
}
@@ -244,9 +244,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.insn) {
+ if (p->ainsn.api.insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.insn;
+ slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
@@ -295,8 +295,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
return;
/* return addr restore if non-branching insn */
- if (cur->ainsn.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.restore);
+ if (cur->ainsn.api.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.api.restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 8977ce9d009d..357d3efe1366 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,28 +13,26 @@
* General Public License for more details.
*/
+#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include "simulate-insn.h"
-#define sign_extend(x, signbit) \
- ((x) | (0 - ((x) & (1 << (signbit)))))
-
#define bbl_displacement(insn) \
- sign_extend(((insn) & 0x3ffffff) << 2, 27)
+ sign_extend32(((insn) & 0x3ffffff) << 2, 27)
#define bcond_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
#define cbz_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
#define tbz_displacement(insn) \
- sign_extend(((insn >> 5) & 0x3fff) << 2, 15)
+ sign_extend32(((insn >> 5) & 0x3fff) << 2, 15)
#define ldr_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
{
@@ -106,7 +104,7 @@ simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs)
xn = opcode & 0x1f;
imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3);
- imm = sign_extend(imm, 20);
+ imm = sign_extend64(imm, 20);
if (opcode & 0x80000000)
val = (imm<<12) + (addr & 0xfffffffffffff000);
else
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
new file mode 100644
index 000000000000..26c998534dca
--- /dev/null
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+#include <asm/cacheflush.h>
+
+#include "decode-insn.h"
+
+#define UPROBE_INV_FAULT_CODE UINT_MAX
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+{
+ void *xol_page_kaddr = kmap_atomic(page);
+ void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+
+ /* Initialize the slot */
+ memcpy(dst, src, len);
+
+ /* flush caches (dcache/icache) */
+ sync_icache_aliases(dst, len);
+
+ kunmap_atomic(xol_page_kaddr);
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+ return instruction_pointer(regs);
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ probe_opcode_t insn;
+
+ /* TODO: Currently we do not support AARCH32 instruction probing */
+ if (test_bit(TIF_32BIT, &mm->context.flags))
+ return -ENOTSUPP;
+ else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
+ return -EINVAL;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+
+ switch (arm_probe_decode_insn(insn, &auprobe->api)) {
+ case INSN_REJECTED:
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT:
+ auprobe->simulate = true;
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /* Initialize with an invalid fault code to detect if ol insn trapped */
+ current->thread.fault_code = UPROBE_INV_FAULT_CODE;
+
+ /* Instruction points to execute ol */
+ instruction_pointer_set(regs, utask->xol_vaddr);
+
+ user_enable_single_step(current);
+
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ WARN_ON_ONCE(current->thread.fault_code != UPROBE_INV_FAULT_CODE);
+
+ /* Instruction points to execute next to breakpoint address */
+ instruction_pointer_set(regs, utask->vaddr + 4);
+
+ user_disable_single_step(current);
+
+ return 0;
+}
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+ /*
+ * Between arch_uprobe_pre_xol and arch_uprobe_post_xol, if an xol
+ * insn itself is trapped, then detect the case with the help of
+ * invalid fault code which is being set in arch_uprobe_pre_xol
+ */
+ if (t->thread.fault_code != UPROBE_INV_FAULT_CODE)
+ return true;
+
+ return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ probe_opcode_t insn;
+ unsigned long addr;
+
+ if (!auprobe->simulate)
+ return false;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ addr = instruction_pointer(regs);
+
+ if (auprobe->api.handler)
+ auprobe->api.handler(insn, addr, regs);
+
+ return true;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /*
+ * Task has received a fatal signal, so reset back to probbed
+ * address.
+ */
+ instruction_pointer_set(regs, utask->vaddr);
+
+ user_disable_single_step(current);
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ /*
+ * If a simple branch instruction (B) was called for retprobed
+ * assembly label then return true even when regs->sp and ret->stack
+ * are same. It will ensure that cleanup and reporting of return
+ * instances corresponding to callee label is done when
+ * handle_trampoline for called function is executed.
+ */
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->sp <= ret->stack;
+ else
+ return regs->sp < ret->stack;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+ struct pt_regs *regs)
+{
+ unsigned long orig_ret_vaddr;
+
+ orig_ret_vaddr = procedure_link_pointer(regs);
+ /* Replace the return addr with trampoline addr */
+ procedure_link_pointer_set(regs, trampoline_vaddr);
+
+ return orig_ret_vaddr;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+static int uprobe_breakpoint_handler(struct pt_regs *regs,
+ unsigned int esr)
+{
+ if (user_mode(regs) && uprobe_pre_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
+
+ return DBG_HOOK_ERROR;
+}
+
+static int uprobe_single_step_handler(struct pt_regs *regs,
+ unsigned int esr)
+{
+ struct uprobe_task *utask = current->utask;
+
+ if (user_mode(regs)) {
+ WARN_ON(utask &&
+ (instruction_pointer(regs) != utask->xol_vaddr + 4));
+
+ if (uprobe_post_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
+ }
+
+ return DBG_HOOK_ERROR;
+}
+
+/* uprobe breakpoint handler hook */
+static struct break_hook uprobes_break_hook = {
+ .esr_mask = BRK64_ESR_MASK,
+ .esr_val = BRK64_ESR_UPROBES,
+ .fn = uprobe_breakpoint_handler,
+};
+
+/* uprobe single step handler hook */
+static struct step_hook uprobes_step_hook = {
+ .fn = uprobe_single_step_handler,
+};
+
+static int __init arch_init_uprobes(void)
+{
+ register_break_hook(&uprobes_break_hook);
+ register_step_hook(&uprobes_step_hook);
+
+ return 0;
+}
+
+device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 01753cd7d3f0..a3a2816ba73a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -45,6 +45,7 @@
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
+#include <linux/percpu.h>
#include <asm/alternative.h>
#include <asm/compat.h>
@@ -282,7 +283,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
if (IS_ENABLED(CONFIG_ARM64_UAO) &&
- cpus_have_cap(ARM64_HAS_UAO))
+ cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
@@ -322,6 +323,20 @@ void uao_thread_switch(struct task_struct *next)
}
/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+ __this_cpu_write(__entry_task, next);
+}
+
+/*
* Thread switching.
*/
struct task_struct *__switch_to(struct task_struct *prev,
@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+ entry_task_switch(next);
uao_thread_switch(next);
/*
@@ -350,27 +366,35 @@ struct task_struct *__switch_to(struct task_struct *prev,
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
- unsigned long stack_page;
+ unsigned long stack_page, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
+ stack_page = (unsigned long)try_get_task_stack(p);
+ if (!stack_page)
+ return 0;
+
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
- stack_page = (unsigned long)task_stack_page(p);
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(p, &frame))
- return 0;
- if (!in_sched_functions(frame.pc))
- return frame.pc;
+ goto out;
+ if (!in_sched_functions(frame.pc)) {
+ ret = frame.pc;
+ goto out;
+ }
} while (count ++ < 16);
- return 0;
+
+out:
+ put_task_stack(p);
+ return ret;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e0c81da60f76..fc35e06ccaac 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -327,13 +327,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
struct arch_hw_breakpoint_ctrl ctrl,
struct perf_event_attr *attr)
{
- int err, len, type, disabled = !ctrl.enabled;
+ int err, len, type, offset, disabled = !ctrl.enabled;
attr->disabled = disabled;
if (disabled)
return 0;
- err = arch_bp_generic_fields(ctrl, &len, &type);
+ err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
if (err)
return err;
@@ -352,6 +352,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
attr->bp_len = len;
attr->bp_type = type;
+ attr->bp_addr += offset;
return 0;
}
@@ -404,7 +405,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type,
if (IS_ERR(bp))
return PTR_ERR(bp);
- *addr = bp ? bp->attr.bp_addr : 0;
+ *addr = bp ? counter_arch_bp(bp)->address : 0;
return 0;
}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 1718706fde83..12a87f2600f2 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
struct return_address_data {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f534f492a268..a53f52ac81c6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -291,6 +291,15 @@ void __init setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Make sure init_thread_info.ttbr0 always generates translation
+ * faults in case uaccess_enable() is inadvertently called by the init
+ * thread.
+ */
+ init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#endif
+
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 404dd67080b9..c7b6de62f9d3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -414,6 +414,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
} else {
local_irq_enable();
+ if (thread_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 1bec41b5fda3..df67652e46f0 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -125,9 +125,6 @@ ENTRY(_cpu_resume)
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
mov sp, x2
- /* save thread_info */
- and x2, x2, #~(THREAD_SIZE - 1)
- msr sp_el0, x2
/*
* cpu_do_resume expects x0 to contain context address pointer
*/
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 8507703dabe4..cb87234cfcf2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -58,6 +58,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -146,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
+ secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -170,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
+ secondary_data.task = NULL;
secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status);
if (ret && status) {
@@ -208,7 +213,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
asmlinkage void secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu;
+
+ cpu = task_cpu(current);
+ set_my_cpu_offset(per_cpu_offset(cpu));
/*
* All kernel threads share the same mm context; grab a
@@ -217,8 +225,6 @@ asmlinkage void secondary_start_kernel(void)
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -718,6 +724,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
+ per_cpu(cpu_number, cpu) = cpu;
+
if (cpu == smp_processor_id())
continue;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c2efddfca18c..8a552a33c6ef 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <asm/irq.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
@@ -128,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
break;
}
}
-EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
@@ -181,6 +181,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
struct stack_trace_data data;
struct stackframe frame;
+ if (!try_get_task_stack(tsk))
+ return;
+
data.trace = trace;
data.skip = trace->skip;
@@ -202,6 +205,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
walk_stackframe(tsk, &frame, save_trace, &data);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
+
+ put_task_stack(tsk);
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index bb0cd787a9d3..1e3be9064cfa 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -47,12 +47,6 @@ void notrace __cpu_suspend_exit(void)
cpu_uninstall_idmap();
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
- */
- set_my_cpu_offset(per_cpu_offset(cpu));
-
- /*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6deedbab..23e9e13bd2aa 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,226 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/cpufreq.h>
+#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);
+
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+ per_cpu(cpu_scale, cpu) = capacity;
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+static ssize_t cpu_capacity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%lu\n",
+ arch_scale_cpu_capacity(NULL, cpu->dev.id));
+}
+
+static ssize_t cpu_capacity_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ ret = kstrtoul(buf, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
+#endif
+
+static u32 capacity_scale;
+static u32 *raw_capacity;
+static bool cap_parsing_failed;
+
+static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
+{
+ int ret;
+ u32 cpu_capacity;
+
+ if (cap_parsing_failed)
+ return;
+
+ ret = of_property_read_u32(cpu_node,
+ "capacity-dmips-mhz",
+ &cpu_capacity);
+ if (!ret) {
+ if (!raw_capacity) {
+ raw_capacity = kcalloc(num_possible_cpus(),
+ sizeof(*raw_capacity),
+ GFP_KERNEL);
+ if (!raw_capacity) {
+ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
+ cap_parsing_failed = true;
+ return;
+ }
+ }
+ capacity_scale = max(cpu_capacity, capacity_scale);
+ raw_capacity[cpu] = cpu_capacity;
+ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
+ cpu_node->full_name, raw_capacity[cpu]);
+ } else {
+ if (raw_capacity) {
+ pr_err("cpu_capacity: missing %s raw capacity\n",
+ cpu_node->full_name);
+ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+ }
+ cap_parsing_failed = true;
+ kfree(raw_capacity);
+ }
+}
+
+static void normalize_cpu_capacity(void)
+{
+ u64 capacity;
+ int cpu;
+
+ if (!raw_capacity || cap_parsing_failed)
+ return;
+
+ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
+ for_each_possible_cpu(cpu) {
+ pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
+ cpu, raw_capacity[cpu]);
+ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
+ / capacity_scale;
+ set_capacity_scale(cpu, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ cpu, arch_scale_cpu_capacity(NULL, cpu));
+ }
+ mutex_unlock(&cpu_scale_mutex);
+}
+
+#ifdef CONFIG_CPU_FREQ
+static cpumask_var_t cpus_to_visit;
+static bool cap_parsing_done;
+static void parsing_done_workfn(struct work_struct *work);
+static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
+
+static int
+init_cpu_capacity_callback(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (cap_parsing_failed || cap_parsing_done)
+ return 0;
+
+ switch (val) {
+ case CPUFREQ_NOTIFY:
+ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
+ cpumask_pr_args(policy->related_cpus),
+ cpumask_pr_args(cpus_to_visit));
+ cpumask_andnot(cpus_to_visit,
+ cpus_to_visit,
+ policy->related_cpus);
+ for_each_cpu(cpu, policy->related_cpus) {
+ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
+ policy->cpuinfo.max_freq / 1000UL;
+ capacity_scale = max(raw_capacity[cpu], capacity_scale);
+ }
+ if (cpumask_empty(cpus_to_visit)) {
+ normalize_cpu_capacity();
+ kfree(raw_capacity);
+ pr_debug("cpu_capacity: parsing done\n");
+ cap_parsing_done = true;
+ schedule_work(&parsing_done_work);
+ }
+ }
+ return 0;
+}
+
+static struct notifier_block init_cpu_capacity_notifier = {
+ .notifier_call = init_cpu_capacity_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+ if (cap_parsing_failed)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
+ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
+ return -ENOMEM;
+ }
+ cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
+ return cpufreq_register_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+static void parsing_done_workfn(struct work_struct *work)
+{
+ cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+
+#else
+static int __init free_raw_capacity(void)
+{
+ kfree(raw_capacity);
+
+ return 0;
+}
+core_initcall(free_raw_capacity);
+#endif
+
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
@@ -34,6 +250,7 @@ static int __init get_cpu_for_node(struct device_node *node)
for_each_possible_cpu(cpu) {
if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ parse_cpu_capacity(cpu_node, cpu);
of_node_put(cpu_node);
return cpu;
}
@@ -178,13 +395,17 @@ static int __init parse_dt_topology(void)
* cluster with restricted subnodes.
*/
map = of_get_child_by_name(cn, "cpu-map");
- if (!map)
+ if (!map) {
+ cap_parsing_failed = true;
goto out;
+ }
ret = parse_cluster(map, 0);
if (ret != 0)
goto out_map;
+ normalize_cpu_capacity();
+
/*
* Check that all cores are in the topology; the SMP code will
* only mark cores described in the DT as possible.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index c9986b3e0a96..5b830be79c01 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
@@ -147,6 +148,9 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!tsk)
tsk = current;
+ if (!try_get_task_stack(tsk))
+ return;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -212,6 +216,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
stack + sizeof(struct pt_regs));
}
}
+
+ put_task_stack(tsk);
}
void show_stack(struct task_struct *tsk, unsigned long *sp)
@@ -227,10 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
#endif
#define S_SMP " SMP"
-static int __die(const char *str, int err, struct thread_info *thread,
- struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
{
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
static int die_counter;
int ret;
@@ -245,7 +250,8 @@ static int __die(const char *str, int err, struct thread_info *thread,
print_modules();
__show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
+ end_of_stack(tsk));
if (!user_mode(regs)) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
@@ -264,7 +270,6 @@ static DEFINE_RAW_SPINLOCK(die_lock);
*/
void die(const char *str, struct pt_regs *regs, int err)
{
- struct thread_info *thread = current_thread_info();
int ret;
oops_enter();
@@ -272,9 +277,9 @@ void die(const char *str, struct pt_regs *regs, int err)
raw_spin_lock_irq(&die_lock);
console_verbose();
bust_spinlocks(1);
- ret = __die(str, err, thread, regs);
+ ret = __die(str, err, regs);
- if (regs && kexec_should_crash(thread->task))
+ if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
@@ -435,9 +440,10 @@ int cpu_enable_cache_maint_trap(void *__unused)
}
#define __user_cache_maint(insn, address, res) \
- if (untagged_addr(address) >= user_addr_max()) \
+ if (untagged_addr(address) >= user_addr_max()) { \
res = -EFAULT; \
- else \
+ } else { \
+ uaccess_ttbr0_enable(); \
asm volatile ( \
"1: " insn ", %1\n" \
" mov %w0, #0\n" \
@@ -449,7 +455,9 @@ int cpu_enable_cache_maint_trap(void *__unused)
" .popsection\n" \
_ASM_EXTABLE(1b, 3b) \
: "=r" (res) \
- : "r" (address), "i" (-EFAULT) )
+ : "r" (address), "i" (-EFAULT)); \
+ uaccess_ttbr0_disable(); \
+ }
static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
{
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1105aab1e6d6..b8deffa9e1bf 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -216,6 +216,11 @@ SECTIONS
swapper_pg_dir = .;
. += SWAPPER_DIR_SIZE;
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ reserved_ttbr0 = .;
+ . += RESERVED_TTBR0_SIZE;
+#endif
+
_end = .;
STABS_DEBUG
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a204adf29f0a..1bfe30dfbfe7 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -57,6 +57,16 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+/*
+ * Guest access to FP/ASIMD registers are routed to this handler only
+ * when the system doesn't support FP/ASIMD.
+ */
+static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
/**
* kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
* instruction executed by a guest
@@ -144,6 +154,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
[ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
+ [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd,
};
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 4e92399f7105..5e9052f087f2 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -106,9 +106,16 @@ el1_trap:
* x0: ESR_EC
*/
- /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ */
+alternative_if_not ARM64_HAS_NO_FPSIMD
cmp x0, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
+alternative_else_nop_endif
mrs x1, tpidr_el2
mov x0, #ARM_EXCEPTION_TRAP
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 0c848c18ca44..75e83dd40d43 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -21,6 +21,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/fpsimd.h>
static bool __hyp_text __fpsimd_enabled_nvhe(void)
{
@@ -76,9 +77,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* traps are only taken to EL2 if the operation would not otherwise
* trap to EL1. Therefore, always make sure that for 32-bit guests,
* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
*/
val = vcpu->arch.hcr_el2;
- if (!(val & HCR_RW)) {
+ if (!(val & HCR_RW) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 5d1cad3ce6d6..d7150e30438a 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,10 +17,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
.text
@@ -33,8 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x2, x3
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -54,8 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x2
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4fd67ea03bb0..cfe13396085b 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -16,11 +16,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy from user space to a kernel buffer (alignment handled by the hardware)
@@ -67,12 +64,10 @@
end .req x5
ENTRY(__arch_copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index f7292dd08c84..718b1c4e2f85 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -18,11 +18,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy from user space to user space (alignment handled by the hardware)
@@ -68,12 +65,10 @@
end .req x5
ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0
ret
ENDPROC(__copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7a7efe255034..e99e31c9acac 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -16,11 +16,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy to user space from a kernel buffer (alignment handled by the hardware)
@@ -66,12 +63,10 @@
end .req x5
ENTRY(__arch_copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 54bb209cae8e..e703fb9defad 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -3,7 +3,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o
+obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 58b5a906ff78..da9576932322 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -23,6 +23,7 @@
#include <asm/assembler.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
+#include <asm/uaccess.h>
/*
* flush_icache_range(start,end)
@@ -48,6 +49,7 @@ ENTRY(flush_icache_range)
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
+ uaccess_ttbr0_enable x2, x3
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -69,10 +71,12 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
dsb ish
isb
mov x0, #0
+1:
+ uaccess_ttbr0_disable x1
ret
9:
mov x0, #-EFAULT
- ret
+ b 1b
ENDPROC(flush_icache_range)
ENDPROC(__flush_cache_user_range)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index efcf1f7ef1e4..4c63cb154859 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -221,7 +221,12 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
- cpu_switch_mm(mm->pgd, mm);
+ /*
+ * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
+ * emulating PAN.
+ */
+ if (!system_uses_ttbr0_pan())
+ cpu_switch_mm(mm->pgd, mm);
}
static int asids_init(void)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3f74d0d98de6..aa6c8f834d9e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -938,11 +938,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_teardown_dma_ops(struct device *dev)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-
- if (WARN_ON(domain))
- iommu_detach_device(domain, dev);
-
dev->archdata.dma_ops = NULL;
}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 9c3e75df2180..ca74a2aace42 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -50,6 +50,18 @@ static const struct addr_marker address_markers[] = {
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
@@ -62,6 +74,9 @@ struct pg_state {
unsigned long start_address;
unsigned level;
u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
};
struct prot_bits {
@@ -186,10 +201,39 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
s = bits->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
}
+static void note_prot_uxn(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if ((st->current_prot & PTE_UXN) == PTE_UXN)
+ return;
+
+ WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+ if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY)
+ return;
+ if ((st->current_prot & PTE_PXN) == PTE_PXN)
+ return;
+
+ WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
u64 val)
{
@@ -200,14 +244,16 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
st->level = level;
st->current_prot = prot;
st->start_address = addr;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
} else if (prot != st->current_prot || level != st->level ||
addr >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%016lx-0x%016lx ",
+ note_prot_uxn(st, addr);
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
@@ -215,17 +261,17 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
pg_level[st->level].name);
if (pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits,
pg_level[st->level].num);
- seq_puts(st->seq, "\n");
+ pt_dump_seq_puts(st->seq, "\n");
}
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
@@ -235,7 +281,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
}
@@ -304,9 +350,8 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
}
}
-static int ptdump_show(struct seq_file *m, void *v)
+void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
{
- struct ptdump_info *info = m->private;
struct pg_state st = {
.seq = m,
.marker = info->markers,
@@ -315,33 +360,16 @@ static int ptdump_show(struct seq_file *m, void *v)
walk_pgd(&st, info->mm, info->base_addr);
note_page(&st, 0, 0, 0);
- return 0;
}
-static int ptdump_open(struct inode *inode, struct file *file)
+static void ptdump_initialize(void)
{
- return single_open(file, ptdump_show, inode->i_private);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-int ptdump_register(struct ptdump_info *info, const char *name)
-{
- struct dentry *pe;
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
-
- pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
- return pe ? 0 : -ENOMEM;
}
static struct ptdump_info kernel_ptdump_info = {
@@ -350,8 +378,30 @@ static struct ptdump_info kernel_ptdump_info = {
.base_addr = VA_START,
};
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
+ .check_wx = true,
+ };
+
+ walk_pgd(&st, &init_mm, 0);
+ note_page(&st, 0, 0, 0);
+ if (st.wx_pages || st.uxn_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
+ st.wx_pages, st.uxn_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
static int ptdump_init(void)
{
- return ptdump_register(&kernel_ptdump_info, "kernel_page_tables");
+ ptdump_initialize();
+ return ptdump_debugfs_register(&kernel_ptdump_info,
+ "kernel_page_tables");
}
device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 0f8788374815..a78a5c401806 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -269,13 +269,19 @@ out:
return fault;
}
-static inline bool is_permission_fault(unsigned int esr)
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
{
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
- return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
- (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+ if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ return false;
+
+ if (system_uses_ttbr0_pan())
+ return fsc_type == ESR_ELx_FSC_FAULT &&
+ (regs->pstate & PSR_PAN_BIT);
+ else
+ return fsc_type == ESR_ELx_FSC_PERM;
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -315,7 +321,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (is_permission_fault(esr) && (addr < USER_DS)) {
+ if (addr < USER_DS && is_permission_fault(esr, regs)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
@@ -507,10 +513,10 @@ static const struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 8377329d8c97..554a2558c12e 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -25,14 +25,7 @@
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- if (vma->vm_flags & VM_EXEC)
- __flush_icache_all();
-}
-
-static void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(void *kaddr, unsigned long len)
{
unsigned long addr = (unsigned long)kaddr;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 2e49bd252fe7..964b7549af5c 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -51,20 +51,8 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
*pgsize = PAGE_SIZE;
if (!pte_cont(pte))
return 1;
- if (!pgd_present(*pgd)) {
- VM_BUG_ON(!pgd_present(*pgd));
- return 1;
- }
pud = pud_offset(pgd, addr);
- if (!pud_present(*pud)) {
- VM_BUG_ON(!pud_present(*pud));
- return 1;
- }
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- VM_BUG_ON(!pmd_present(*pmd));
- return 1;
- }
if ((pte_t *)pmd == ptep) {
*pgsize = PMD_SIZE;
return CONT_PMDS;
@@ -212,7 +200,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
/* save the 1st pte to return */
pte = ptep_get_and_clear(mm, addr, cpte);
- for (i = 1; i < ncontig; ++i) {
+ for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
/*
* If HW_AFDBM is enabled, then the HW could
* turn on the dirty bit for any of the page
@@ -250,7 +238,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
pfn = pte_pfn(*cpte);
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
*cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte) {
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
changed = ptep_set_access_flags(vma, addr, cpte,
pfn_pte(pfn,
hugeprot),
@@ -273,7 +261,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
cpte = huge_pte_offset(mm, addr);
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte)
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
ptep_set_wrprotect(mm, addr, cpte);
} else {
ptep_set_wrprotect(mm, addr, ptep);
@@ -291,7 +279,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
cpte = huge_pte_offset(vma->vm_mm, addr);
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
*cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte)
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
ptep_clear_flush(vma, addr, cpte);
} else {
ptep_clear_flush(vma, addr, ptep);
@@ -323,7 +311,7 @@ __setup("hugepagesz=", setup_hugepagesz);
static __init int add_default_hugepagesz(void)
{
if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PMD_SHIFT);
+ hugetlb_add_hstate(CONT_PTE_SHIFT);
return 0;
}
arch_initcall(add_default_hugepagesz);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 05615a3fdc6f..17243e43184e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,8 +28,6 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -42,6 +40,7 @@
#include <asm/tlb.h>
#include <asm/memblock.h>
#include <asm/mmu_context.h>
+#include <asm/ptdump.h>
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
@@ -95,11 +94,24 @@ static phys_addr_t __init early_pgtable_alloc(void)
return phys;
}
+static bool pgattr_change_is_safe(u64 old, u64 new)
+{
+ /*
+ * The following mapping attributes may be updated in live
+ * kernel mappings without the need for break-before-make.
+ */
+ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;
+
+ return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0;
+}
+
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void))
+ phys_addr_t (*pgtable_alloc)(void),
+ bool page_mappings_only)
{
+ pgprot_t __prot = prot;
pte_t *pte;
BUG_ON(pmd_sect(*pmd));
@@ -115,8 +127,28 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_set_fixmap_offset(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, prot));
+ pte_t old_pte = *pte;
+
+ /*
+ * Set the contiguous bit for the subsequent group of PTEs if
+ * its size and alignment are appropriate.
+ */
+ if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) {
+ if (end - addr >= CONT_PTE_SIZE && !page_mappings_only)
+ __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ else
+ __prot = prot;
+ }
+
+ set_pte(pte, pfn_pte(pfn, __prot));
pfn++;
+
+ /*
+ * After the PTE entry has been populated once, we
+ * only allow updates to the permission attributes.
+ */
+ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
@@ -125,8 +157,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
+ pgprot_t __prot = prot;
pmd_t *pmd;
unsigned long next;
@@ -146,27 +179,39 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
pmd = pmd_set_fixmap_offset(pud, addr);
do {
+ pmd_t old_pmd = *pmd;
+
next = pmd_addr_end(addr, end);
+
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
- allow_block_mappings) {
- pmd_t old_pmd =*pmd;
- pmd_set_huge(pmd, phys, prot);
+ !page_mappings_only) {
/*
- * Check for previous table entries created during
- * boot (__create_page_tables) and flush them.
+ * Set the contiguous bit for the subsequent group of
+ * PMDs if its size and alignment are appropriate.
*/
- if (!pmd_none(old_pmd)) {
- flush_tlb_all();
- if (pmd_table(old_pmd)) {
- phys_addr_t table = pmd_page_paddr(old_pmd);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
+ if (((addr | phys) & ~CONT_PMD_MASK) == 0) {
+ if (end - addr >= CONT_PMD_SIZE)
+ __prot = __pgprot(pgprot_val(prot) |
+ PTE_CONT);
+ else
+ __prot = prot;
}
+ pmd_set_huge(pmd, phys, __prot);
+
+ /*
+ * After the PMD entry has been populated once, we
+ * only allow updates to the permission attributes.
+ */
+ BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
+ pmd_val(*pmd)));
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, pgtable_alloc);
+ prot, pgtable_alloc,
+ page_mappings_only);
+
+ BUG_ON(pmd_val(old_pmd) != 0 &&
+ pmd_val(old_pmd) != pmd_val(*pmd));
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -189,7 +234,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
pud_t *pud;
unsigned long next;
@@ -204,33 +249,28 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
pud = pud_set_fixmap_offset(pgd, addr);
do {
+ pud_t old_pud = *pud;
+
next = pud_addr_end(addr, end);
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (use_1G_block(addr, next, phys) && allow_block_mappings) {
- pud_t old_pud = *pud;
+ if (use_1G_block(addr, next, phys) && !page_mappings_only) {
pud_set_huge(pud, phys, prot);
/*
- * If we have an old value for a pud, it will
- * be pointing to a pmd table that we no longer
- * need (from swapper_pg_dir).
- *
- * Look up the old pmd table and free it.
+ * After the PUD entry has been populated once, we
+ * only allow updates to the permission attributes.
*/
- if (!pud_none(old_pud)) {
- flush_tlb_all();
- if (pud_table(old_pud)) {
- phys_addr_t table = pud_page_paddr(old_pud);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
- }
+ BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
+ pud_val(*pud)));
} else {
alloc_init_pmd(pud, addr, next, phys, prot,
- pgtable_alloc, allow_block_mappings);
+ pgtable_alloc, page_mappings_only);
+
+ BUG_ON(pud_val(old_pud) != 0 &&
+ pud_val(old_pud) != pud_val(*pud));
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@@ -242,7 +282,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
unsigned long addr, length, end, next;
pgd_t *pgd = pgd_offset_raw(pgdir, virt);
@@ -262,7 +302,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
- allow_block_mappings);
+ page_mappings_only);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
@@ -291,17 +331,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot, bool allow_block_mappings)
+ pgprot_t prot, bool page_mappings_only)
{
BUG_ON(mm == &init_mm);
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc, allow_block_mappings);
+ pgd_pgtable_alloc, page_mappings_only);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -314,7 +354,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
- NULL, !debug_pagealloc_enabled());
+ NULL, debug_pagealloc_enabled());
}
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
@@ -332,7 +372,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
end - start, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
return;
}
@@ -345,13 +385,13 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
__phys_to_virt(start),
kernel_start - start, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
if (kernel_end < end)
__create_pgd_mapping(pgd, kernel_end,
__phys_to_virt(kernel_end),
end - kernel_end, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
/*
* Map the linear alias of the [_text, __init_begin) interval as
@@ -361,7 +401,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
*/
__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
kernel_end - kernel_start, PAGE_KERNEL_RO,
- early_pgtable_alloc, !debug_pagealloc_enabled());
+ early_pgtable_alloc, debug_pagealloc_enabled());
}
static void __init map_mem(pgd_t *pgd)
@@ -396,6 +436,11 @@ void mark_rodata_ro(void)
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+
+ /* flush the TLBs after updating live kernel mappings */
+ flush_tlb_all();
+
+ debug_checkwx();
}
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
@@ -408,7 +453,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
- early_pgtable_alloc, !debug_pagealloc_enabled());
+ early_pgtable_alloc, debug_pagealloc_enabled());
vma->addr = va_start;
vma->phys_addr = pa_start;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 352c73b6a59e..32682be978e0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,11 +70,14 @@ ENTRY(cpu_do_suspend)
mrs x8, mdscr_el1
mrs x9, oslsr_el1
mrs x10, sctlr_el1
+ mrs x11, tpidr_el1
+ mrs x12, sp_el0
stp x2, x3, [x0]
stp x4, xzr, [x0, #16]
stp x5, x6, [x0, #32]
stp x7, x8, [x0, #48]
stp x9, x10, [x0, #64]
+ stp x11, x12, [x0, #80]
ret
ENDPROC(cpu_do_suspend)
@@ -90,6 +93,7 @@ ENTRY(cpu_do_resume)
ldp x6, x8, [x0, #32]
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
+ ldp x13, x14, [x0, #80]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
@@ -112,6 +116,8 @@ ENTRY(cpu_do_resume)
msr mdscr_el1, x10
msr sctlr_el1, x12
+ msr tpidr_el1, x13
+ msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
*/
@@ -136,11 +142,7 @@ ENTRY(cpu_do_switch_mm)
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
- ic iallu
- dsb nsh
- isb
-alternative_else_nop_endif
+ post_ttbr0_update_workaround
ret
ENDPROC(cpu_do_switch_mm)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
new file mode 100644
index 000000000000..eee4d864350c
--- /dev/null
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -0,0 +1,31 @@
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/ptdump.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct ptdump_info *info = m->private;
+ ptdump_walk_pgd(m, info);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, inode->i_private);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+{
+ struct dentry *pe;
+ pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ return pe ? 0 : -ENOMEM;
+
+}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 329c8027b0a9..b41aff25426d 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -49,6 +49,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/uaccess.h>
#include <xen/interface/xen.h>
@@ -91,6 +92,20 @@ ENTRY(privcmd_call)
mov x2, x3
mov x3, x4
mov x4, x5
+ /*
+ * Privcmd calls are issued by the userspace. The kernel needs to
+ * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
+ * translations to user memory via AT instructions. Since AT
+ * instructions are not affected by the PAN bit (ARMv8.1), we only
+ * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
+ * is enabled (it implies that hardware UAO and PAN disabled).
+ */
+ uaccess_ttbr0_enable x6, x7
hvc XEN_IMM
+
+ /*
+ * Disable userspace access from kernel once the hyp call completed.
+ */
+ uaccess_ttbr0_disable x6
ret
ENDPROC(privcmd_call);
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 7c75a8d9091a..349dc3e1e52e 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -39,7 +39,7 @@ static struct mm_struct efi_mm = {
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
};
-#ifdef CONFIG_ARM64_PTDUMP
+#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
#include <asm/ptdump.h>
static struct ptdump_info efi_ptdump_info = {
@@ -53,7 +53,7 @@ static struct ptdump_info efi_ptdump_info = {
static int __init ptdump_init(void)
{
- return ptdump_register(&efi_ptdump_info, "efi_page_tables");
+ return ptdump_debugfs_register(&efi_ptdump_info, "efi_page_tables");
}
device_initcall(ptdump_init);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 19d642eae096..26e1d7fafb1e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -120,11 +120,10 @@ static void gic_redist_wait_for_rwp(void)
}
#ifdef CONFIG_ARM64
-static DEFINE_STATIC_KEY_FALSE(is_cavium_thunderx);
static u64 __maybe_unused gic_read_iar(void)
{
- if (static_branch_unlikely(&is_cavium_thunderx))
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
return gic_read_iar_cavium_thunderx();
else
return gic_read_iar_common();
@@ -905,14 +904,6 @@ static const struct irq_domain_ops partition_domain_ops = {
.select = gic_irq_domain_select,
};
-static void gicv3_enable_quirks(void)
-{
-#ifdef CONFIG_ARM64
- if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_23154))
- static_branch_enable(&is_cavium_thunderx);
-#endif
-}
-
static int __init gic_init_bases(void __iomem *dist_base,
struct redist_region *rdist_regs,
u32 nr_redist_regions,
@@ -935,8 +926,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
gic_data.nr_redist_regions = nr_redist_regions;
gic_data.redist_stride = redist_stride;
- gicv3_enable_quirks();
-
/*
* Find out how many interrupts are supported.
* The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
new file mode 100644
index 000000000000..0d905d8ec553
--- /dev/null
+++ b/include/linux/restart_block.h
@@ -0,0 +1,51 @@
+/*
+ * Common syscall restarting data
+ */
+#ifndef __LINUX_RESTART_BLOCK_H
+#define __LINUX_RESTART_BLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct timespec;
+struct compat_timespec;
+struct pollfd;
+
+/*
+ * System call restart block.
+ */
+struct restart_block {
+ long (*fn)(struct restart_block *);
+ union {
+ /* For futex_wait and futex_wait_requeue_pi */
+ struct {
+ u32 __user *uaddr;
+ u32 val;
+ u32 flags;
+ u32 bitset;
+ u64 time;
+ u32 __user *uaddr2;
+ } futex;
+ /* For nanosleep */
+ struct {
+ clockid_t clockid;
+ struct timespec __user *rmtp;
+#ifdef CONFIG_COMPAT
+ struct compat_timespec __user *compat_rmtp;
+#endif
+ u64 expires;
+ } nanosleep;
+ /* For poll */
+ struct {
+ struct pollfd __user *ufds;
+ int nfds;
+ int has_timeout;
+ unsigned long tv_sec;
+ unsigned long tv_nsec;
+ } poll;
+ };
+};
+
+extern long do_no_restart_syscall(struct restart_block *parm);
+
+#endif /* __LINUX_RESTART_BLOCK_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2873baf5372a..58373875e8ee 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -9,50 +9,17 @@
#include <linux/types.h>
#include <linux/bug.h>
-
-struct timespec;
-struct compat_timespec;
+#include <linux/restart_block.h>
#ifdef CONFIG_THREAD_INFO_IN_TASK
-#define current_thread_info() ((struct thread_info *)current)
-#endif
-
/*
- * System call restart block.
+ * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
+ * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
+ * including <asm/current.h> can cause a circular dependency on some platforms.
*/
-struct restart_block {
- long (*fn)(struct restart_block *);
- union {
- /* For futex_wait and futex_wait_requeue_pi */
- struct {
- u32 __user *uaddr;
- u32 val;
- u32 flags;
- u32 bitset;
- u64 time;
- u32 __user *uaddr2;
- } futex;
- /* For nanosleep */
- struct {
- clockid_t clockid;
- struct timespec __user *rmtp;
-#ifdef CONFIG_COMPAT
- struct compat_timespec __user *compat_rmtp;
+#include <asm/current.h>
+#define current_thread_info() ((struct thread_info *)current)
#endif
- u64 expires;
- } nanosleep;
- /* For poll */
- struct {
- struct pollfd __user *ufds;
- int nfds;
- int has_timeout;
- unsigned long tv_sec;
- unsigned long tv_nsec;
- } poll;
- };
-};
-
-extern long do_no_restart_syscall(struct restart_block *parm);
#include <linux/bitops.h>
#include <asm/thread_info.h>
diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h
index b04000a2296a..2b65efd19a46 100644
--- a/include/uapi/linux/hw_breakpoint.h
+++ b/include/uapi/linux/hw_breakpoint.h
@@ -4,7 +4,11 @@
enum {
HW_BREAKPOINT_LEN_1 = 1,
HW_BREAKPOINT_LEN_2 = 2,
+ HW_BREAKPOINT_LEN_3 = 3,
HW_BREAKPOINT_LEN_4 = 4,
+ HW_BREAKPOINT_LEN_5 = 5,
+ HW_BREAKPOINT_LEN_6 = 6,
+ HW_BREAKPOINT_LEN_7 = 7,
HW_BREAKPOINT_LEN_8 = 8,
};
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
index b04000a2296a..2b65efd19a46 100644
--- a/tools/include/uapi/linux/hw_breakpoint.h
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -4,7 +4,11 @@
enum {
HW_BREAKPOINT_LEN_1 = 1,
HW_BREAKPOINT_LEN_2 = 2,
+ HW_BREAKPOINT_LEN_3 = 3,
HW_BREAKPOINT_LEN_4 = 4,
+ HW_BREAKPOINT_LEN_5 = 5,
+ HW_BREAKPOINT_LEN_6 = 6,
+ HW_BREAKPOINT_LEN_7 = 7,
HW_BREAKPOINT_LEN_8 = 8,
};
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 74e533fd4bc5..61b79e8df1f4 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -5,6 +5,9 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
ifeq ($(ARCH),x86)
TEST_PROGS := breakpoint_test
endif
+ifeq ($(ARCH),aarch64)
+TEST_PROGS := breakpoint_test_arm64
+endif
TEST_PROGS += step_after_suspend_test
@@ -13,4 +16,4 @@ all: $(TEST_PROGS)
include ../lib.mk
clean:
- rm -fr breakpoint_test step_after_suspend_test
+ rm -fr breakpoint_test breakpoint_test_arm64 step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
new file mode 100644
index 000000000000..3897e996541e
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Original Code by Pavel Labath <labath@google.com>
+ *
+ * Code modified by Pratyush Anand <panand@redhat.com>
+ * for testing different byte select for each access size.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "../kselftest.h"
+
+static volatile uint8_t var[96] __attribute__((__aligned__(32)));
+
+static void child(int size, int wr)
+{
+ volatile uint8_t *addr = &var[32 + wr];
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ perror("ptrace(PTRACE_TRACEME) failed");
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ perror("raise(SIGSTOP) failed");
+ _exit(1);
+ }
+
+ if ((uintptr_t) addr % size) {
+ perror("Wrong address write for the given size\n");
+ _exit(1);
+ }
+ switch (size) {
+ case 1:
+ *addr = 47;
+ break;
+ case 2:
+ *(uint16_t *)addr = 47;
+ break;
+ case 4:
+ *(uint32_t *)addr = 47;
+ break;
+ case 8:
+ *(uint64_t *)addr = 47;
+ break;
+ case 16:
+ __asm__ volatile ("stp x29, x30, %0" : "=m" (addr[0]));
+ break;
+ case 32:
+ __asm__ volatile ("stp q29, q30, %0" : "=m" (addr[0]));
+ break;
+ }
+
+ _exit(0);
+}
+
+static bool set_watchpoint(pid_t pid, int size, int wp)
+{
+ const volatile uint8_t *addr = &var[32 + wp];
+ const int offset = (uintptr_t)addr % 8;
+ const unsigned int byte_mask = ((1 << size) - 1) << offset;
+ const unsigned int type = 2; /* Write */
+ const unsigned int enable = 1;
+ const unsigned int control = byte_mask << 5 | type << 3 | enable;
+ struct user_hwdebug_state dreg_state;
+ struct iovec iov;
+
+ memset(&dreg_state, 0, sizeof(dreg_state));
+ dreg_state.dbg_regs[0].addr = (uintptr_t)(addr - offset);
+ dreg_state.dbg_regs[0].ctrl = control;
+ iov.iov_base = &dreg_state;
+ iov.iov_len = offsetof(struct user_hwdebug_state, dbg_regs) +
+ sizeof(dreg_state.dbg_regs[0]);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_WATCH, &iov) == 0)
+ return true;
+
+ if (errno == EIO) {
+ printf("ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) "
+ "not supported on this hardware\n");
+ ksft_exit_skip();
+ }
+ perror("ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) failed");
+ return false;
+}
+
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
+{
+ int status;
+ siginfo_t siginfo;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ perror("fork() failed");
+ return false;
+ }
+ if (pid == 0)
+ child(wr_size, wr);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ printf("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ printf("child did not stop with SIGSTOP\n");
+ return false;
+ }
+
+ if (!set_watchpoint(pid, wp_size, wp))
+ return false;
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ perror("ptrace(PTRACE_SINGLESTEP) failed");
+ return false;
+ }
+
+ alarm(3);
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ alarm(0);
+ if (WIFEXITED(status)) {
+ printf("child did not single-step\t");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ printf("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ printf("child did not stop with SIGTRAP\n");
+ return false;
+ }
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0) {
+ perror("ptrace(PTRACE_GETSIGINFO)");
+ return false;
+ }
+ if (siginfo.si_code != TRAP_HWBKPT) {
+ printf("Unexpected si_code %d\n", siginfo.si_code);
+ return false;
+ }
+
+ kill(pid, SIGKILL);
+ wpid = waitpid(pid, &status, 0);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ return true;
+}
+
+static void sigalrm(int sig)
+{
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool succeeded = true;
+ struct sigaction act;
+ int wr, wp, size;
+ bool result;
+
+ act.sa_handler = sigalrm;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGALRM, &act, NULL);
+ for (size = 1; size <= 32; size = size*2) {
+ for (wr = 0; wr <= 32; wr = wr + size) {
+ for (wp = wr - size; wp <= wr + size; wp = wp + size) {
+ printf("Test size = %d write offset = %d watchpoint offset = %d\t", size, wr, wp);
+ result = run_test(size, MIN(size, 8), wr, wp);
+ if ((result && wr == wp) || (!result && wr != wp)) {
+ printf("[OK]\n");
+ ksft_inc_pass_cnt();
+ } else {
+ printf("[FAILED]\n");
+ ksft_inc_fail_cnt();
+ succeeded = false;
+ }
+ }
+ }
+ }
+
+ for (size = 1; size <= 32; size = size*2) {
+ printf("Test size = %d write offset = %d watchpoint offset = -8\t", size, -size);
+
+ if (run_test(size, 8, -size, -8)) {
+ printf("[OK]\n");
+ ksft_inc_pass_cnt();
+ } else {
+ printf("[FAILED]\n");
+ ksft_inc_fail_cnt();
+ succeeded = false;
+ }
+ }
+
+ ksft_print_cnts();
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}