summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 15:03:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 15:03:00 -0800
commit94a855111ed9106971ca2617c5d075269e6aefde (patch)
tree330c762a403cf70c2cdbf12e7394e5e7c2a69a79 /arch
parent93761c93e9da28d8a020777cee2a84133082b477 (diff)
parentf1a033cc6b9eb6d80322008422df3c87aa5d47a0 (diff)
Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov: - Add the call depth tracking mitigation for Retbleed which has been long in the making. It is a lighterweight software-only fix for Skylake-based cores where enabling IBRS is a big hammer and causes a significant performance impact. What it basically does is, it aligns all kernel functions to 16 bytes boundary and adds a 16-byte padding before the function, objtool collects all functions' locations and when the mitigation gets applied, it patches a call accounting thunk which is used to track the call depth of the stack at any time. When that call depth reaches a magical, microarchitecture-specific value for the Return Stack Buffer, the code stuffs that RSB and avoids its underflow which could otherwise lead to the Intel variant of Retbleed. This software-only solution brings a lot of the lost performance back, as benchmarks suggest: https://lore.kernel.org/all/20220915111039.092790446@infradead.org/ That page above also contains a lot more detailed explanation of the whole mechanism - Implement a new control flow integrity scheme called FineIBT which is based on the software kCFI implementation and uses hardware IBT support where present to annotate and track indirect branches using a hash to validate them - Other misc fixes and cleanups * tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits) x86/paravirt: Use common macro for creating simple asm paravirt functions x86/paravirt: Remove clobber bitmask from .parainstructions x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit x86/Kconfig: Enable kernel IBT by default x86,pm: Force out-of-line memcpy() objtool: Fix weak hole vs prefix symbol objtool: Optimize elf_dirty_reloc_sym() x86/cfi: Add boot time hash randomization x86/cfi: Boot time selection of CFI scheme x86/ibt: Implement FineIBT objtool: Add --cfi to generate the .cfi_sites section x86: Add prefix symbols for function padding objtool: Add option to generate prefix symbols objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf objtool: Slice up elf_create_section_symbol() kallsyms: Revert "Take callthunks into account" x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces x86/retpoline: Fix crash printing warning x86/paravirt: Fix a !PARAVIRT build warning ...
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig24
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/Makefile2
-rw-r--r--arch/um/kernel/um_arch.c5
-rw-r--r--arch/x86/Kconfig75
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S2
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S4
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S2
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S1
-rw-r--r--arch/x86/crypto/poly1305-x86_64-cryptogams.pl1
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S2
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S2
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S1
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S1
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S1
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S1
-rw-r--r--arch/x86/crypto/sha256_ni_asm.S1
-rw-r--r--arch/x86/crypto/sm3-avx-asm_64.S1
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S7
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S6
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S2
-rw-r--r--arch/x86/entry/entry_32.S4
-rw-r--r--arch/x86/entry/entry_64.S50
-rw-r--r--arch/x86/entry/entry_64_compat.S7
-rw-r--r--arch/x86/entry/thunk_64.S4
-rw-r--r--arch/x86/entry/vdso/Makefile14
-rw-r--r--arch/x86/include/asm/alternative.h68
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/current.h32
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/disabled-features.h9
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/irq_stack.h12
-rw-r--r--arch/x86/include/asm/linkage.h63
-rw-r--r--arch/x86/include/asm/nospec-branch.h176
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h34
-rw-r--r--arch/x86/include/asm/preempt.h27
-rw-r--r--arch/x86/include/asm/processor.h11
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h47
-rw-r--r--arch/x86/include/asm/smp.h12
-rw-r--r--arch/x86/include/asm/text-patching.h1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c529
-rw-r--r--arch/x86/kernel/asm-offsets.c5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/callthunks.c388
-rw-r--r--arch/x86/kernel/cpu/Makefile3
-rw-r--r--arch/x86/kernel/cpu/bugs.c34
-rw-r--r--arch/x86/kernel/cpu/common.c97
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/ftrace.c20
-rw-r--r--arch/x86/kernel/ftrace_64.S37
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/irq_64.c6
-rw-r--r--arch/x86/kernel/kprobes/core.c1
-rw-r--r--arch/x86/kernel/kvm.c18
-rw-r--r--arch/x86/kernel/module.c47
-rw-r--r--arch/x86/kernel/paravirt.c21
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S5
-rw-r--r--arch/x86/kernel/setup_percpu.c7
-rw-r--r--arch/x86/kernel/smpboot.c10
-rw-r--r--arch/x86/kernel/static_call.c3
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/unwind_orc.c21
-rw-r--r--arch/x86/kernel/vmlinux.lds.S37
-rw-r--r--arch/x86/kvm/svm/vmenter.S1
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/lib/putuser.S62
-rw-r--r--arch/x86/lib/retpoline.S107
-rw-r--r--arch/x86/net/bpf_jit_comp.c45
-rw-r--r--arch/x86/power/hibernate.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c2
79 files changed, 1902 insertions, 399 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 2d0e7099eb3f..a3c47c2a79cd 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
+config FUNCTION_ALIGNMENT_4B
+ bool
+
+config FUNCTION_ALIGNMENT_8B
+ bool
+
+config FUNCTION_ALIGNMENT_16B
+ bool
+
+config FUNCTION_ALIGNMENT_32B
+ bool
+
+config FUNCTION_ALIGNMENT_64B
+ bool
+
+config FUNCTION_ALIGNMENT
+ int
+ default 64 if FUNCTION_ALIGNMENT_64B
+ default 32 if FUNCTION_ALIGNMENT_32B
+ default 16 if FUNCTION_ALIGNMENT_16B
+ default 8 if FUNCTION_ALIGNMENT_8B
+ default 4 if FUNCTION_ALIGNMENT_4B
+ default 0
+
endmenu
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c6e06cdc738f..d7e4a24e8644 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -63,6 +63,7 @@ config IA64
select NUMA if !FLATMEM
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select ZONE_DMA32
+ select FUNCTION_ALIGNMENT_32B
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 56c4bb276b6e..d553ab7022fe 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
EXTRA :=
cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
- -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+ -frename-registers -fno-optimize-sibling-calls
KBUILD_CFLAGS_KERNEL := -mconstant-gp
GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8adf8e89b255..786b44dc20c9 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
{
}
+void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+ s32 *start_cfi, s32 *end_cfi)
+{
+}
+
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9f8c03b347f9..61b4c7bcd4d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -292,6 +292,8 @@ config X86
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX
+ select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16
+ select FUNCTION_ALIGNMENT_4B
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
@@ -1855,7 +1857,7 @@ config CC_HAS_IBT
config X86_KERNEL_IBT
prompt "Indirect Branch Tracking"
- bool
+ def_bool y
depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
depends on !LD_IS_LLD || LLD_VERSION >= 140000
@@ -2492,6 +2494,46 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
+config CC_HAS_ENTRY_PADDING
+ def_bool $(cc-option,-fpatchable-function-entry=16,16)
+
+config FUNCTION_PADDING_CFI
+ int
+ default 59 if FUNCTION_ALIGNMENT_64B
+ default 27 if FUNCTION_ALIGNMENT_32B
+ default 11 if FUNCTION_ALIGNMENT_16B
+ default 3 if FUNCTION_ALIGNMENT_8B
+ default 0
+
+# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG
+# except Kconfig can't do arithmetic :/
+config FUNCTION_PADDING_BYTES
+ int
+ default FUNCTION_PADDING_CFI if CFI_CLANG
+ default FUNCTION_ALIGNMENT
+
+config CALL_PADDING
+ def_bool n
+ depends on CC_HAS_ENTRY_PADDING && OBJTOOL
+ select FUNCTION_ALIGNMENT_16B
+
+config FINEIBT
+ def_bool y
+ depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
+ select CALL_PADDING
+
+config HAVE_CALL_THUNKS
+ def_bool y
+ depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
+
+config CALL_THUNKS
+ def_bool n
+ select CALL_PADDING
+
+config PREFIX_SYMBOLS
+ def_bool y
+ depends on CALL_PADDING && !CFI_CLANG
+
menuconfig SPECULATION_MITIGATIONS
bool "Mitigations for speculative execution vulnerabilities"
default y
@@ -2543,6 +2585,37 @@ config CPU_UNRET_ENTRY
help
Compile the kernel with support for the retbleed=unret mitigation.
+config CALL_DEPTH_TRACKING
+ bool "Mitigate RSB underflow with call depth tracking"
+ depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
+ select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
+ select CALL_THUNKS
+ default y
+ help
+ Compile the kernel with call depth tracking to mitigate the Intel
+ SKL Return-Speculation-Buffer (RSB) underflow issue. The
+ mitigation is off by default and needs to be enabled on the
+ kernel command line via the retbleed=stuff option. For
+ non-affected systems the overhead of this option is marginal as
+ the call depth tracking is using run-time generated call thunks
+ in a compiler generated padding area and call patching. This
+ increases text size by ~5%. For non affected systems this space
+ is unused. On affected SKL systems this results in a significant
+ performance gain over the IBRS mitigation.
+
+config CALL_THUNKS_DEBUG
+ bool "Enable call thunks and call depth tracking debugging"
+ depends on CALL_DEPTH_TRACKING
+ select FUNCTION_ALIGNMENT_32B
+ default n
+ help
+ Enable call/ret counters for imbalance detection and build in
+ a noisy dmesg about callthunks generation and call patching for
+ trouble shooting. The debug prints need to be enabled on the
+ kernel command line with 'debug-callthunks'.
+ Only enable this, when you are debugging call thunks as this
+ creates a noticable runtime overhead. If unsure say N.
+
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
depends on CPU_SUP_AMD && X86_64
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 415a5d138de4..a3a07df8a609 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -208,6 +208,12 @@ ifdef CONFIG_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
+ifdef CONFIG_CALL_PADDING
+PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
+KBUILD_CFLAGS += $(PADDING_CFLAGS)
+export PADDING_CFLAGS
+endif
+
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d4c4281db635..a75712991df3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -38,6 +38,14 @@
#include "pgtable.h"
/*
+ * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
+ * in assembly errors due to trying to move .org backward due to the excessive
+ * alignment.
+ */
+#undef __ALIGN
+#define __ALIGN .balign 16, 0x90
+
+/*
* Locally defined symbols should be marked hidden:
*/
.hidden _bss
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2e1658ddbe1a..4a30618281ec 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
-.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
@@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk16)
-.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 0e4e9abbf4de..deaf62aa73a6 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -221,7 +221,6 @@
* Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
-.align 8
SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
@@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
RET;
SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
-.align 8
SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
@@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
-.align 8
SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
/* input:
* %rdi: ctx, CTX
@@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
jmp .Lenc_done;
SYM_FUNC_END(__camellia_enc_blk32)
-.align 8
SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b258af420c92..0326a01503c3 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -208,7 +208,6 @@
.text
-.align 16
SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
/* input:
* %rdi: ctx
@@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
RET;
SYM_FUNC_END(__cast5_enc_blk16)
-.align 16
SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
/* input:
* %rdi: ctx
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index 721474abfb71..5286db5b8165 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -94,7 +94,6 @@
#
# Assumes len >= 16.
#
-.align 16
SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 2077ce7a5647..b9abcd79c1f4 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -108,7 +108,6 @@ if (!$kernel) {
sub declare_function() {
my ($name, $align, $nargs) = @_;
if($kernel) {
- $code .= ".align $align\n";
$code .= "SYM_FUNC_START($name)\n";
$code .= ".L$name:\n";
} else {
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 82f2313f512b..97e283621851 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
/* input:
* %rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
RET;
SYM_FUNC_END(__serpent_enc_blk8_avx)
-.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index 8ea34c9b9316..6d60c50593a9 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -550,7 +550,6 @@
#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-.align 8
SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
/* input:
* %rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
RET;
SYM_FUNC_END(__serpent_enc_blk16)
-.align 8
SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 3cae5a1bb3d6..cade913d4882 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -93,7 +93,6 @@
* numBlocks: Number of blocks to process
*/
.text
-.align 32
SYM_TYPED_FUNC_START(sha1_ni_transform)
push %rbp
mov %rsp, %rbp
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 06ea30c20828..5555b5d5215a 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -348,7 +348,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_avx)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 2d2be531a11e..3eada9416852 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -525,7 +525,6 @@ STACK_SIZE = _CTX + _CTX_SIZE
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_rorx)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 7db28839108d..959288eecc68 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -357,7 +357,6 @@ a = TMP_
########################################################################
.text
SYM_TYPED_FUNC_START(sha256_transform_ssse3)
-.align 32
pushq %rbx
pushq %r12
pushq %r13
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index 47f93937f798..537b6dcd7ed8 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -97,7 +97,6 @@
*/
.text
-.align 32
SYM_TYPED_FUNC_START(sha256_ni_transform)
shl $6, NUM_BLKS /* convert to bytes */
diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
index 8fc5ac681fd6..503bab450a91 100644
--- a/arch/x86/crypto/sm3-avx-asm_64.S
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -328,7 +328,6 @@
* void sm3_transform_avx(struct sm3_state *state,
* const u8 *data, int nblocks);
*/
-.align 16
SYM_TYPED_FUNC_START(sm3_transform_avx)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index 22b6560eb9e1..e2668d2fe6ce 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -140,13 +140,11 @@
.text
-.align 16
/*
* void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
-.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt4)
/* input:
* %rdi: round key array, CTX
@@ -250,7 +248,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4)
RET;
SYM_FUNC_END(sm4_aesni_avx_crypt4)
-.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
/* input:
* %rdi: round key array, CTX
@@ -364,7 +361,6 @@ SYM_FUNC_END(__sm4_crypt_blk8)
* void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
* const u8 *src, int nblocks)
*/
-.align 8
SYM_FUNC_START(sm4_aesni_avx_crypt8)
/* input:
* %rdi: round key array, CTX
@@ -420,7 +416,6 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8)
* void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
/* input:
* %rdi: round key array, CTX
@@ -495,7 +490,6 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
* void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
/* input:
* %rdi: round key array, CTX
@@ -545,7 +539,6 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
* void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
/* input:
* %rdi: round key array, CTX
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
index 23ee39a8ada8..98ede9459287 100644
--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -154,9 +154,6 @@
.long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef
.text
-.align 16
-
-.align 8
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
/* input:
* %rdi: round key array, CTX
@@ -282,7 +279,6 @@ SYM_FUNC_END(__sm4_crypt_blk16)
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
/* input:
* %rdi: round key array, CTX
@@ -395,7 +391,6 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
/* input:
* %rdi: round key array, CTX
@@ -449,7 +444,6 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
* const u8 *src, u8 *iv)
*/
-.align 8
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
/* input:
* %rdi: round key array, CTX
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 31f9b2ec3857..12fde271cd3f 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -228,7 +228,6 @@
vpxor x2, wkey, x2; \
vpxor x3, wkey, x3;
-.align 8
SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
/* input:
* %rdi: ctx, CTX
@@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
RET;
SYM_FUNC_END(__twofish_enc_blk8)
-.align 8
SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
/* input:
* %rdi: ctx, CTX
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index e309e7156038..91397f58ac30 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi)
* is using the thread stack right now, so it's safe for us to use it.
*/
movl %esp, %ebx
- movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp
call exc_nmi
movl %ebx, %esp
@@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
- movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
+ movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
call make_task_dead
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9953d966d124..15739a2c0983 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64)
/* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
#endif
/*
@@ -284,9 +284,11 @@ SYM_FUNC_END(__switch_to_asm)
* r12: kernel thread arg
*/
.pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
+ __FUNC_ALIGN
+SYM_CODE_START_NOALIGN(ret_from_fork)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR // copy_thread
+ CALL_DEPTH_ACCOUNT
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -326,11 +328,12 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
-SYM_CODE_START_LOCAL(xen_error_entry)
+SYM_CODE_START(xen_error_entry)
+ ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
- UNTRAIN_RET
+ UNTRAIN_RET_FROM_CALL
RET
SYM_CODE_END(xen_error_entry)
@@ -600,13 +603,13 @@ SYM_CODE_END(\asmsym)
* shared between 32 and 64 bit and emit the __irqentry_text_* markers
* so the stacktrace boundary checks work.
*/
- .align 16
+ __ALIGN
.globl __irqentry_text_start
__irqentry_text_start:
#include <asm/idtentry.h>
- .align 16
+ __ALIGN
.globl __irqentry_text_end
__irqentry_text_end:
ANNOTATE_NOENDBR
@@ -828,7 +831,8 @@ EXPORT_SYMBOL(asm_load_gs_index)
*
* C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
*/
-SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
+ __FUNC_ALIGN
+SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback)
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -856,7 +860,8 @@ SYM_CODE_END(exc_xen_hypervisor_callback)
* We distinguish between categories by comparing each saved segment register
* with its current contents: any discrepancy means we in category 1.
*/
-SYM_CODE_START(xen_failsafe_callback)
+ __FUNC_ALIGN
+SYM_CODE_START_NOALIGN(xen_failsafe_callback)
UNWIND_HINT_EMPTY
ENDBR
movl %ds, %ecx
@@ -903,7 +908,8 @@ SYM_CODE_END(xen_failsafe_callback)
* R14 - old CR3
* R15 - old SPEC_CTRL
*/
-SYM_CODE_START_LOCAL(paranoid_entry)
+SYM_CODE_START(paranoid_entry)
+ ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -972,7 +978,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* CR3 above, keep the old value in a callee saved register.
*/
IBRS_ENTER save_reg=%r15
- UNTRAIN_RET
+ UNTRAIN_RET_FROM_CALL
RET
SYM_CODE_END(paranoid_entry)
@@ -1038,7 +1044,8 @@ SYM_CODE_END(paranoid_exit)
/*
* Switch GS and CR3 if needed.
*/
-SYM_CODE_START_LOCAL(error_entry)
+SYM_CODE_START(error_entry)
+ ANNOTATE_NOENDBR
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
@@ -1056,14 +1063,11 @@ SYM_CODE_START_LOCAL(error_entry)
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
- UNTRAIN_RET
+ UNTRAIN_RET_FROM_CALL
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
-.Lerror_entry_from_usermode_after_swapgs:
-
/* Put us onto the real thread stack. */
- call sync_regs
- RET
+ jmp sync_regs
/*
* There are two places in the kernel that can potentially fault with
@@ -1094,6 +1098,7 @@ SYM_CODE_START_LOCAL(error_entry)
*/
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
+ CALL_DEPTH_ACCOUNT
leaq 8(%rsp), %rax /* return pt_regs pointer */
ANNOTATE_UNRET_END
RET
@@ -1112,7 +1117,7 @@ SYM_CODE_START_LOCAL(error_entry)
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
IBRS_ENTER
- UNTRAIN_RET
+ UNTRAIN_RET_FROM_CALL
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1121,7 +1126,7 @@ SYM_CODE_START_LOCAL(error_entry)
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
call fixup_bad_iret
mov %rax, %rdi
- jmp .Lerror_entry_from_usermode_after_swapgs
+ jmp sync_regs
SYM_CODE_END(error_entry)
SYM_CODE_START_LOCAL(error_return)
@@ -1206,7 +1211,7 @@ SYM_CODE_START(asm_exc_nmi)
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
@@ -1516,12 +1521,13 @@ SYM_CODE_END(ignore_sysret)
#endif
.pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_and_make_dead)
+ __FUNC_ALIGN
+SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 59b93901660d..70150298f8bd 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
popq %rax
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
@@ -128,7 +128,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
popfq
jmp .Lsysenter_flags_fixed
SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
- ANNOTATE_NOENDBR // is_sysenter_singlestep
SYM_CODE_END(entry_SYSENTER_compat)
/*
@@ -191,7 +190,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Switch to the kernel stack */
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -332,7 +331,7 @@ SYM_CODE_START(entry_INT80_compat)
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
movq %rsp, %rax
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
pushq 5*8(%rax) /* regs->ss */
pushq 4*8(%rax) /* regs->rsp */
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index f38b07d2768b..5e37f41e5f14 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -11,7 +11,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
-SYM_FUNC_START_NOALIGN(\name)
+SYM_FUNC_START(\name)
pushq %rbp
movq %rsp, %rbp
@@ -36,7 +36,7 @@ SYM_FUNC_END(\name)
EXPORT_SYMBOL(preempt_schedule_thunk)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
-SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
+SYM_CODE_START_LOCAL(__thunk_restore)
popq %r11
popq %r10
popq %r9
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 3e88b9df8c8f..838613ac15b8 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -33,11 +33,12 @@ vobjs32-y += vdso32/vclock_gettime.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
# files to link into kernel
-obj-y += vma.o extable.o
-KASAN_SANITIZE_vma.o := y
-UBSAN_SANITIZE_vma.o := y
-KCSAN_SANITIZE_vma.o := y
-OBJECT_FILES_NON_STANDARD_vma.o := n
+obj-y += vma.o extable.o
+KASAN_SANITIZE_vma.o := y
+UBSAN_SANITIZE_vma.o := y
+KCSAN_SANITIZE_vma.o := y
+OBJECT_FILES_NON_STANDARD_vma.o := n
+OBJECT_FILES_NON_STANDARD_extable.o := n
# vDSO images to build
vdso_img-$(VDSO64-y) += 64
@@ -94,7 +95,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
endif
endif
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
@@ -157,6 +158,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
KBUILD_CFLAGS_32 += -fno-stack-protector
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9542c582d546..7659217f4d49 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -78,8 +78,43 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
extern void apply_returns(s32 *start, s32 *end);
extern void apply_ibt_endbr(s32 *start, s32 *end);
+extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
+ s32 *start_cfi, s32 *end_cfi);
struct module;
+struct paravirt_patch_site;
+
+struct callthunk_sites {
+ s32 *call_start, *call_end;
+ struct paravirt_patch_site *pv_start, *pv_end;
+};
+
+#ifdef CONFIG_CALL_THUNKS
+extern void callthunks_patch_builtin_calls(void);
+extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
+ struct module *mod);
+extern void *callthunks_translate_call_dest(void *dest);
+extern bool is_callthunk(void *addr);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+#else
+static __always_inline void callthunks_patch_builtin_calls(void) {}
+static __always_inline void
+callthunks_patch_module_calls(struct callthunk_sites *sites,
+ struct module *mod) {}
+static __always_inline void *callthunks_translate_call_dest(void *dest)
+{
+ return dest;
+}
+static __always_inline bool is_callthunk(void *addr)
+{
+ return false;
+}
+static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
+ void *func)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
@@ -347,6 +382,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define old_len 141b-140b
#define new_len1 144f-143f
#define new_len2 145f-144f
+#define new_len3 146f-145f
/*
* gas compatible max based on the idea from:
@@ -354,7 +390,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
*
* The additional "-" is needed because gas uses a "true" value of -1.
*/
-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
/*
@@ -366,13 +403,36 @@ static inline int alternatives_text_reserved(void *start, void *end)
140:
\oldinstr
141:
- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+ .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_2(new_len1, new_len2) - (old_len)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr1
+144:
+ \newinstr2
+145:
+ .popsection
+.endm
+
+.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3
+140:
+ \oldinstr
+141:
+ .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
+ (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+ altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f
.popsection
.pushsection .altinstr_replacement,"ax"
@@ -381,6 +441,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
144:
\newinstr2
145:
+ \newinstr3
+146:
.popsection
.endm
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c9f4730bb113..2dd2691b5ee1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -305,8 +305,7 @@
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
-
-
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 3e204e6140b5..a1168e7b69e5 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -3,16 +3,42 @@
#define _ASM_X86_CURRENT_H
#include <linux/compiler.h>
-#include <asm/percpu.h>
#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <asm/percpu.h>
+
struct task_struct;
-DECLARE_PER_CPU(struct task_struct *, current_task);
+struct pcpu_hot {
+ union {
+ struct {
+ struct task_struct *current_task;
+ int preempt_count;
+ int cpu_number;
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+ u64 call_depth;
+#endif
+ unsigned long top_of_stack;
+ void *hardirq_stack_ptr;
+ u16 softirq_pending;
+#ifdef CONFIG_X86_64
+ bool hardirq_stack_inuse;
+#else
+ void *softirq_stack_ptr;
+#endif
+ };
+ u8 pad[64];
+ };
+};
+static_assert(sizeof(struct pcpu_hot) == 64);
+
+DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
static __always_inline struct task_struct *get_current(void)
{
- return this_cpu_read_stable(current_task);
+ return this_cpu_read_stable(pcpu_hot.current_task);
}
#define current get_current()
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index cfdf307ddc01..b049d950612f 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,8 +2,8 @@
#ifndef _ASM_X86_DEBUGREG_H
#define _ASM_X86_DEBUGREG_H
-
#include <linux/bug.h>
+#include <linux/percpu.h>
#include <uapi/asm/debugreg.h>
DECLARE_PER_CPU(unsigned long, cpu_dr7);
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c862552d7d6d..c44b56f7ffba 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -69,6 +69,12 @@
# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
#endif
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+# define DISABLE_CALL_DEPTH_TRACKING 0
+#else
+# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -107,7 +113,8 @@
#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
+ DISABLE_CALL_DEPTH_TRACKING)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 275e7fd20310..66837b8c67f1 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,9 +3,9 @@
#define _ASM_X86_HARDIRQ_H
#include <linux/threads.h>
+#include <asm/current.h>
typedef struct {
- u16 __softirq_pending;
#if IS_ENABLED(CONFIG_KVM_INTEL)
u8 kvm_cpu_l1tf_flush_l1d;
#endif
@@ -60,6 +60,7 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
+#define local_softirq_pending_ref pcpu_hot.softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
static inline void kvm_set_cpu_l1tf_flush_l1d(void)
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 147cb8fdda92..798183867d78 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -116,7 +116,7 @@
ASM_CALL_ARG2
#define call_on_irqstack(func, asm_call, argconstr...) \
- call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
+ call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \
func, asm_call, argconstr)
/* Macros to assert type correctness for run_*_on_irqstack macros */
@@ -135,7 +135,7 @@
* User mode entry and interrupt on the irq stack do not \
* switch stacks. If from user mode the task stack is empty. \
*/ \
- if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \
+ if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \
irq_enter_rcu(); \
func(c_args); \
irq_exit_rcu(); \
@@ -146,9 +146,9 @@
* places. Invoke the stack switch macro with the call \
* sequence which matches the above direct invocation. \
*/ \
- __this_cpu_write(hardirq_stack_inuse, true); \
+ __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
call_on_irqstack(func, asm_call, constr); \
- __this_cpu_write(hardirq_stack_inuse, false); \
+ __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
} \
}
@@ -212,9 +212,9 @@
*/
#define do_softirq_own_stack() \
{ \
- __this_cpu_write(hardirq_stack_inuse, true); \
+ __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
- __this_cpu_write(hardirq_stack_inuse, false); \
+ __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
}
#endif
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f484d656d34e..dd9b8118f784 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -12,13 +12,26 @@
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
#endif /* CONFIG_X86_32 */
-#ifdef __ASSEMBLY__
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
-#define __ALIGN .p2align 4, 0x90
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
#define __ALIGN_STR __stringify(__ALIGN)
+
+#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#else
+#define FUNCTION_PADDING
+#endif
+
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
+# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING
+#else
+# define __FUNC_ALIGN __ALIGN
#endif
+#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN)
+#define SYM_F_ALIGN __FUNC_ALIGN
+
+#ifdef __ASSEMBLY__
+
#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
#else /* CONFIG_RETPOLINE */
@@ -43,11 +56,45 @@
#endif /* __ASSEMBLY__ */
+/*
+ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
+ * CFI symbol layout changes.
+ *
+ * Without CALL_THUNKS:
+ *
+ * .align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * .skip FUNCTION_PADDING, 0x90
+ * .byte 0xb8
+ * .long __kcfi_typeid_##name
+ * name:
+ *
+ * With CALL_THUNKS:
+ *
+ * .align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * .byte 0xb8
+ * .long __kcfi_typeid_##name
+ * .skip FUNCTION_PADDING, 0x90
+ * name:
+ *
+ * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
+ */
+
+#ifdef CONFIG_CALL_PADDING
+#define CFI_PRE_PADDING
+#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#else
+#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#define CFI_POST_PADDING
+#endif
+
#define __CFI_TYPE(name) \
SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \
- .fill 11, 1, 0x90 ASM_NL \
+ CFI_PRE_PADDING \
.byte 0xb8 ASM_NL \
.long __kcfi_typeid_##name ASM_NL \
+ CFI_POST_PADDING \
SYM_FUNC_END(__cfi_##name)
/* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
@@ -57,7 +104,7 @@
/* SYM_FUNC_START -- use for global functions */
#define SYM_FUNC_START(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
@@ -67,7 +114,7 @@
/* SYM_FUNC_START_LOCAL -- use for local functions */
#define SYM_FUNC_START_LOCAL(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
+ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
@@ -77,7 +124,7 @@
/* SYM_FUNC_START_WEAK -- use for weak functions */
#define SYM_FUNC_START_WEAK(name) \
- SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
+ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \
ENDBR
/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dfdb103ae4f6..771b0a2b7a34 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -12,8 +12,104 @@
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
+#include <asm/current.h>
-#define RETPOLINE_THUNK_SIZE 32
+/*
+ * Call depth tracking for Intel SKL CPUs to address the RSB underflow
+ * issue in software.
+ *
+ * The tracking does not use a counter. It uses uses arithmetic shift
+ * right on call entry and logical shift left on return.
+ *
+ * The depth tracking variable is initialized to 0x8000.... when the call
+ * depth is zero. The arithmetic shift right sign extends the MSB and
+ * saturates after the 12th call. The shift count is 5 for both directions
+ * so the tracking covers 12 nested calls.
+ *
+ * Call
+ * 0: 0x8000000000000000 0x0000000000000000
+ * 1: 0xfc00000000000000 0xf000000000000000
+ * ...
+ * 11: 0xfffffffffffffff8 0xfffffffffffffc00
+ * 12: 0xffffffffffffffff 0xffffffffffffffe0
+ *
+ * After a return buffer fill the depth is credited 12 calls before the
+ * next stuffing has to take place.
+ *
+ * There is a inaccuracy for situations like this:
+ *
+ * 10 calls
+ * 5 returns
+ * 3 calls
+ * 4 returns
+ * 3 calls
+ * ....
+ *
+ * The shift count might cause this to be off by one in either direction,
+ * but there is still a cushion vs. the RSB depth. The algorithm does not
+ * claim to be perfect and it can be speculated around by the CPU, but it
+ * is considered that it obfuscates the problem enough to make exploitation
+ * extremly difficult.
+ */
+#define RET_DEPTH_SHIFT 5
+#define RSB_RET_STUFF_LOOPS 16
+#define RET_DEPTH_INIT 0x8000000000000000ULL
+#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL
+#define RET_DEPTH_CREDIT 0xffffffffffffffffULL
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+# define CALL_THUNKS_DEBUG_INC_CALLS \
+ incq %gs:__x86_call_count;
+# define CALL_THUNKS_DEBUG_INC_RETS \
+ incq %gs:__x86_ret_count;
+# define CALL_THUNKS_DEBUG_INC_STUFFS \
+ incq %gs:__x86_stuffs_count;
+# define CALL_THUNKS_DEBUG_INC_CTXSW \
+ incq %gs:__x86_ctxsw_count;
+#else
+# define CALL_THUNKS_DEBUG_INC_CALLS
+# define CALL_THUNKS_DEBUG_INC_RETS
+# define CALL_THUNKS_DEBUG_INC_STUFFS
+# define CALL_THUNKS_DEBUG_INC_CTXSW
+#endif
+
+#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+
+#include <asm/asm-offsets.h>
+
+#define CREDIT_CALL_DEPTH \
+ movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define ASM_CREDIT_CALL_DEPTH \
+ movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define RESET_CALL_DEPTH \
+ mov $0x80, %rax; \
+ shl $56, %rax; \
+ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define RESET_CALL_DEPTH_FROM_CALL \
+ mov $0xfc, %rax; \
+ shl $56, %rax; \
+ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#define INCREMENT_CALL_DEPTH \
+ sarq $5, %gs:pcpu_hot + X86_call_depth; \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#define ASM_INCREMENT_CALL_DEPTH \
+ sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ CALL_THUNKS_DEBUG_INC_CALLS
+
+#else
+#define CREDIT_CALL_DEPTH
+#define ASM_CREDIT_CALL_DEPTH
+#define RESET_CALL_DEPTH
+#define INCREMENT_CALL_DEPTH
+#define ASM_INCREMENT_CALL_DEPTH
+#define RESET_CALL_DEPTH_FROM_CALL
+#endif
/*
* Fill the CPU return stack buffer.
@@ -32,6 +128,7 @@
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
*/
+#define RETPOLINE_THUNK_SIZE 32
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
@@ -60,7 +157,9 @@
dec reg; \
jnz 771b; \
/* barrier for jnz misprediction */ \
- lfence;
+ lfence; \
+ ASM_CREDIT_CALL_DEPTH \
+ CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
* i386 doesn't unconditionally have LFENCE, as such it can't
@@ -185,11 +284,32 @@
* where we have a stack but before any RET instruction.
*/
.macro UNTRAIN_RET
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+ defined(CONFIG_CALL_DEPTH_TRACKING)
ANNOTATE_UNRET_END
- ALTERNATIVE_2 "", \
- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+ ALTERNATIVE_3 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+.macro UNTRAIN_RET_FROM_CALL
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+ defined(CONFIG_CALL_DEPTH_TRACKING)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_3 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+ __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+
+.macro CALL_DEPTH_ACCOUNT
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+ ALTERNATIVE "", \
+ __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
@@ -203,11 +323,45 @@
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
extern void __x86_return_thunk(void);
extern void zen_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_CALL_THUNKS
+extern void (*x86_return_thunk)(void);
+#else
+#define x86_return_thunk (&__x86_return_thunk)
+#endif
+
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+extern void __x86_return_skl(void);
+
+static inline void x86_set_skl_return_thunk(void)
+{
+ x86_return_thunk = &__x86_return_skl;
+}
+
+#define CALL_DEPTH_ACCOUNT \
+ ALTERNATIVE("", \
+ __stringify(INCREMENT_CALL_DEPTH), \
+ X86_FEATURE_CALL_DEPTH)
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DECLARE_PER_CPU(u64, __x86_call_count);
+DECLARE_PER_CPU(u64, __x86_ret_count);
+DECLARE_PER_CPU(u64, __x86_stuffs_count);
+DECLARE_PER_CPU(u64, __x86_ctxsw_count);
+#endif
+#else
+static inline void x86_set_skl_return_thunk(void) {}
+
+#define CALL_DEPTH_ACCOUNT ""
+
+#endif
+
#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
@@ -215,6 +369,16 @@ extern void entry_ibpb(void);
#include <asm/GEN-for-each-reg.h>
#undef GEN
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2a0b8dd4ec33..73e9522db7c1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -4,13 +4,13 @@
/* Various instructions on x86 need to be replaced for
* para-virtualization: those hooks are defined here. */
+#include <asm/paravirt_types.h>
+
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
#include <asm/nospec-branch.h>
-#include <asm/paravirt_types.h>
-
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <linux/types.h>
@@ -665,6 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
+ ASM_FUNC_ALIGN \
PV_THUNK_NAME(func) ":" \
ASM_ENDBR \
FRAME_BEGIN \
@@ -730,6 +731,18 @@ static __always_inline unsigned long arch_local_irq_save(void)
#undef PVOP_VCALL4
#undef PVOP_CALL4
+#define DEFINE_PARAVIRT_ASM(func, instr, sec) \
+ asm (".pushsection " #sec ", \"ax\"\n" \
+ ".global " #func "\n\t" \
+ ".type " #func ", @function\n\t" \
+ ASM_FUNC_ALIGN "\n" \
+ #func ":\n\t" \
+ ASM_ENDBR \
+ instr "\n\t" \
+ ASM_RET \
+ ".size " #func ", . - " #func "\n\t" \
+ ".popsection")
+
extern void default_banner(void);
#else /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f72bf0f6ccf7..8c1da419260f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -3,6 +3,24 @@
#define _ASM_X86_PARAVIRT_TYPES_H
#ifndef __ASSEMBLY__
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch_site {
+ u8 *instr; /* original instructions */
+ u8 type; /* type of this instruction */
+ u8 len; /* length of original instruction */
+};
+
+/* Lazy mode for batching updates / context switch */
+enum paravirt_lazy_mode {
+ PARAVIRT_LAZY_NONE,
+ PARAVIRT_LAZY_MMU,
+ PARAVIRT_LAZY_CPU,
+};
+#endif
+
+#ifdef CONFIG_PARAVIRT
+
+#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
#include <asm/pgtable_types.h>
@@ -534,13 +552,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
- PARAVIRT_LAZY_NONE,
- PARAVIRT_LAZY_MMU,
- PARAVIRT_LAZY_CPU,
-};
-
enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
void paravirt_start_context_switch(struct task_struct *prev);
void paravirt_end_context_switch(struct task_struct *next);
@@ -556,16 +567,9 @@ unsigned long paravirt_ret0(void);
#define paravirt_nop ((void *)_paravirt_nop)
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 type; /* type of this instruction */
- u8 len; /* length of original instruction */
-};
-
extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
#endif /* __ASSEMBLY__ */
-
+#endif /* CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 5f6daea1ee24..2d13f25b1bd8 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,11 +4,11 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
+#include <asm/current.h>
+
#include <linux/thread_info.h>
#include <linux/static_call_types.h>
-DECLARE_PER_CPU(int, __preempt_count);
-
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@@ -24,7 +24,7 @@ DECLARE_PER_CPU(int, __preempt_count);
*/
static __always_inline int preempt_count(void)
{
- return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
@@ -32,10 +32,10 @@ static __always_inline void preempt_count_set(int pc)
int old, new;
do {
- old = raw_cpu_read_4(__preempt_count);
+ old = raw_cpu_read_4(pcpu_hot.preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
- } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
+ } while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old);
}
/*
@@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
+ per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
@@ -58,17 +58,17 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
- raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+ raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
- raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+ raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@@ -77,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
- raw_cpu_add_4(__preempt_count, val);
+ raw_cpu_add_4(pcpu_hot.preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
- raw_cpu_add_4(__preempt_count, -val);
+ raw_cpu_add_4(pcpu_hot.preempt_count, -val);
}
/*
@@ -92,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
+ return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+ __percpu_arg([var]));
}
/*
@@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
*/
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+ return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6836c64b9819..4e35c66edeb7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -377,8 +377,6 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
-DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
/*
@@ -401,8 +399,6 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
-DECLARE_PER_CPU(void *, hardirq_stack_ptr);
-DECLARE_PER_CPU(bool, hardirq_stack_inuse);
extern asmlinkage void ignore_sysret(void);
/* Save actual FS/GS selectors and bases to current->thread */
@@ -411,8 +407,6 @@ void current_save_fsgs(void);
#ifdef CONFIG_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* !X86_64 */
struct perf_event;
@@ -517,7 +511,7 @@ static __always_inline unsigned long current_top_of_stack(void)
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
- return this_cpu_read_stable(cpu_current_top_of_stack);
+ return this_cpu_read_stable(pcpu_hot.top_of_stack);
}
static __always_inline bool on_thread_stack(void)
@@ -554,10 +548,9 @@ extern int sysenter_setup(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
-extern void switch_to_new_gdt(int);
+extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
-extern void load_percpu_segment(int);
extern void cpu_init(void);
extern void cpu_init_secondary(void);
extern void cpu_init_exception_handling(void);
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index dbb38a6b4dfb..42b17cf10b10 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -14,8 +14,6 @@
__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
-#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
-#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
/*
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
@@ -37,32 +35,27 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
* rsi = lockval (second argument)
* rdx = internal variable (set to 0)
*/
-asm (".pushsection .spinlock.text, \"ax\";"
- ".globl " PV_UNLOCK ";"
- ".type " PV_UNLOCK ", @function;"
- ".align 4,0x90;"
- PV_UNLOCK ": "
- ASM_ENDBR
- FRAME_BEGIN
- "push %rdx;"
- "mov $0x1,%eax;"
- "xor %edx,%edx;"
- LOCK_PREFIX "cmpxchg %dl,(%rdi);"
- "cmp $0x1,%al;"
- "jne .slowpath;"
- "pop %rdx;"
+#define PV_UNLOCK_ASM \
+ FRAME_BEGIN \
+ "push %rdx\n\t" \
+ "mov $0x1,%eax\n\t" \
+ "xor %edx,%edx\n\t" \
+ LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
+ "cmp $0x1,%al\n\t" \
+ "jne .slowpath\n\t" \
+ "pop %rdx\n\t" \
+ FRAME_END \
+ ASM_RET \
+ ".slowpath:\n\t" \
+ "push %rsi\n\t" \
+ "movzbl %al,%esi\n\t" \
+ "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \
+ "pop %rsi\n\t" \
+ "pop %rdx\n\t" \
FRAME_END
- ASM_RET
- ".slowpath: "
- "push %rsi;"
- "movzbl %al,%esi;"
- "call " PV_UNLOCK_SLOWPATH ";"
- "pop %rsi;"
- "pop %rdx;"
- FRAME_END
- ASM_RET
- ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
- ".popsection");
+
+DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock,
+ PV_UNLOCK_ASM, .spinlock.text);
#else /* CONFIG_64BIT */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a73bced40e24..b4dbb20dab1a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -3,10 +3,10 @@
#define _ASM_X86_SMP_H
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
-#include <asm/percpu.h>
-#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/current.h>
+#include <asm/thread_info.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
@@ -19,7 +19,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
@@ -150,11 +149,10 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
/*
* This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
+ * from the initial startup.
*/
-#define raw_smp_processor_id() this_cpu_read(cpu_number)
-#define __smp_processor_id() __this_cpu_read(cpu_number)
+#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number)
+#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 1cc15528ce29..f4b87f08f5c5 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cceaafdd2d84..96d51bbc2bd4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -143,6 +143,8 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_CALL_THUNKS) += callthunks.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a9bea860e22a..23cbfa8d34c5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern s32 __return_sites[], __return_sites_end[];
+extern s32 __cfi_sites[], __cfi_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
@@ -377,6 +378,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}
+static inline bool is_jcc32(struct insn *insn)
+{
+ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+ return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
+}
+
+static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 op = insn->opcode.bytes[0];
+ int i = 0;
+
+ /*
+ * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
+ * tail-calls. Deal with them.
+ */
+ if (is_jcc32(insn)) {
+ bytes[i++] = op;
+ op = insn->opcode.bytes[1];
+ goto clang_jcc;
+ }
+
+ if (insn->length == 6)
+ bytes[i++] = 0x2e; /* CS-prefix */
+
+ switch (op) {
+ case CALL_INSN_OPCODE:
+ __text_gen_insn(bytes+i, op, addr+i,
+ __x86_indirect_call_thunk_array[reg],
+ CALL_INSN_SIZE);
+ i += CALL_INSN_SIZE;
+ break;
+
+ case JMP32_INSN_OPCODE:
+clang_jcc:
+ __text_gen_insn(bytes+i, op, addr+i,
+ __x86_indirect_jump_thunk_array[reg],
+ JMP32_INSN_SIZE);
+ i += JMP32_INSN_SIZE;
+ break;
+
+ default:
+ WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr);
+ return -1;
+ }
+
+ WARN_ON_ONCE(i != insn->length);
+
+ return i;
+}
+
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@@ -409,8 +460,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
- !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+ return emit_call_track_retpoline(addr, insn, reg, bytes);
+
return -1;
+ }
op = insn->opcode.bytes[0];
@@ -427,8 +482,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
* [ NOP ]
* 1:
*/
- /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
- if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
+ if (is_jcc32(insn)) {
cc = insn->opcode.bytes[1] & 0xf;
cc ^= 1; /* invert condition */
@@ -518,6 +572,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
#ifdef CONFIG_RETHUNK
+
+#ifdef CONFIG_CALL_THUNKS
+void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+#endif
+
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@@ -533,14 +592,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
{
int i = 0;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- return -1;
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (x86_return_thunk == __x86_return_thunk)
+ return -1;
- bytes[i++] = RET_INSN_OPCODE;
+ i = JMP32_INSN_SIZE;
+ __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
+ } else {
+ bytes[i++] = RET_INSN_OPCODE;
+ }
for (; i < insn->length;)
bytes[i++] = INT3_INSN_OPCODE;
-
return i;
}
@@ -594,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#ifdef CONFIG_X86_KERNEL_IBT
+static void poison_endbr(void *addr, bool warn)
+{
+ u32 endbr, poison = gen_endbr_poison();
+
+ if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
+ return;
+
+ if (!is_endbr(endbr)) {
+ WARN_ON_ONCE(warn);
+ return;
+ }
+
+ DPRINTK("ENDBR at: %pS (%px)", addr, addr);
+
+ /*
+ * When we have IBT, the lack of ENDBR will trigger #CP
+ */
+ DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
+ text_poke_early(addr, &poison, 4);
+}
+
/*
* Generated by: objtool --ibt
*/
@@ -602,31 +687,391 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
s32 *s;
for (s = start; s < end; s++) {
- u32 endbr, poison = gen_endbr_poison();
void *addr = (void *)s + *s;
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
+ poison_endbr(addr, true);
+ if (IS_ENABLED(CONFIG_FINEIBT))
+ poison_endbr(addr - 16, false);
+ }
+}
+
+#else
+
+void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
+#ifdef CONFIG_FINEIBT
+
+enum cfi_mode {
+ CFI_DEFAULT,
+ CFI_OFF,
+ CFI_KCFI,
+ CFI_FINEIBT,
+};
+
+static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
+static bool cfi_rand __ro_after_init = true;
+static u32 cfi_seed __ro_after_init;
+
+/*
+ * Re-hash the CFI hash with a boot-time seed while making sure the result is
+ * not a valid ENDBR instruction.
+ */
+static u32 cfi_rehash(u32 hash)
+{
+ hash ^= cfi_seed;
+ while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+ bool lsb = hash & 1;
+ hash >>= 1;
+ if (lsb)
+ hash ^= 0x80200003;
+ }
+ return hash;
+}
+
+static __init int cfi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ while (str) {
+ char *next = strchr(str, ',');
+ if (next) {
+ *next = 0;
+ next++;
+ }
+
+ if (!strcmp(str, "auto")) {
+ cfi_mode = CFI_DEFAULT;
+ } else if (!strcmp(str, "off")) {
+ cfi_mode = CFI_OFF;
+ cfi_rand = false;
+ } else if (!strcmp(str, "kcfi")) {
+ cfi_mode = CFI_KCFI;
+ } else if (!strcmp(str, "fineibt")) {
+ cfi_mode = CFI_FINEIBT;
+ } else if (!strcmp(str, "norand")) {
+ cfi_rand = false;
+ } else {
+ pr_err("Ignoring unknown cfi option (%s).", str);
+ }
+
+ str = next;
+ }
+
+ return 0;
+}
+early_param("cfi", cfi_parse_cmdline);
+
+/*
+ * kCFI FineIBT
+ *
+ * __cfi_\func: __cfi_\func:
+ * movl $0x12345678,%eax // 5 endbr64 // 4
+ * nop subl $0x12345678,%r10d // 7
+ * nop jz 1f // 2
+ * nop ud2 // 2
+ * nop 1: nop // 1
+ * nop
+ * nop
+ * nop
+ * nop
+ * nop
+ * nop
+ * nop
+ *
+ *
+ * caller: caller:
+ * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6
+ * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4
+ * je 1f // 2 nop4 // 4
+ * ud2 // 2
+ * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5
+ *
+ */
+
+asm( ".pushsection .rodata \n"
+ "fineibt_preamble_start: \n"
+ " endbr64 \n"
+ " subl $0x12345678, %r10d \n"
+ " je fineibt_preamble_end \n"
+ " ud2 \n"
+ " nop \n"
+ "fineibt_preamble_end: \n"
+ ".popsection\n"
+);
+
+extern u8 fineibt_preamble_start[];
+extern u8 fineibt_preamble_end[];
+
+#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
+#define fineibt_preamble_hash 7
+
+asm( ".pushsection .rodata \n"
+ "fineibt_caller_start: \n"
+ " movl $0x12345678, %r10d \n"
+ " sub $16, %r11 \n"
+ ASM_NOP4
+ "fineibt_caller_end: \n"
+ ".popsection \n"
+);
+
+extern u8 fineibt_caller_start[];
+extern u8 fineibt_caller_end[];
+
+#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
+#define fineibt_caller_hash 2
+
+#define fineibt_caller_jmp (fineibt_caller_size - 2)
+
+static u32 decode_preamble_hash(void *addr)
+{
+ u8 *p = addr;
+
+ /* b8 78 56 34 12 mov $0x12345678,%eax */
+ if (p[0] == 0xb8)
+ return *(u32 *)(addr + 1);
+
+ return 0; /* invalid hash value */
+}
+
+static u32 decode_caller_hash(void *addr)
+{
+ u8 *p = addr;
+
+ /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */
+ if (p[0] == 0x41 && p[1] == 0xba)
+ return -*(u32 *)(addr + 2);
+
+ /* e8 0c 78 56 34 12 jmp.d8 +12 */
+ if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
+ return -*(u32 *)(addr + 2);
+
+ return 0; /* invalid hash value */
+}
+
+/* .retpoline_sites */
+static int cfi_disable_callers(s32 *start, s32 *end)
+{
+ /*
+ * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
+ * in tact for later usage. Also see decode_caller_hash() and
+ * cfi_rewrite_callers().
+ */
+ const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ addr -= fineibt_caller_size;
+ hash = decode_caller_hash(addr);
+ if (!hash) /* nocfi callers */
continue;
- if (WARN_ON_ONCE(!is_endbr(endbr)))
+ text_poke_early(addr, jmp, 2);
+ }
+
+ return 0;
+}
+
+static int cfi_enable_callers(s32 *start, s32 *end)
+{
+ /*
+ * Re-enable kCFI, undo what cfi_disable_callers() did.
+ */
+ const u8 mov[] = { 0x41, 0xba };
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ addr -= fineibt_caller_size;
+ hash = decode_caller_hash(addr);
+ if (!hash) /* nocfi callers */
continue;
- DPRINTK("ENDBR at: %pS (%px)", addr, addr);
+ text_poke_early(addr, mov, 2);
+ }
- /*
- * When we have IBT, the lack of ENDBR will trigger #CP
- */
- DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
- text_poke_early(addr, &poison, 4);
+ return 0;
+}
+
+/* .cfi_sites */
+static int cfi_rand_preamble(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ hash = decode_preamble_hash(addr);
+ if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
+ addr, addr, 5, addr))
+ return -EINVAL;
+
+ hash = cfi_rehash(hash);
+ text_poke_early(addr + 1, &hash, 4);
+ }
+
+ return 0;
+}
+
+static int cfi_rewrite_preamble(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ hash = decode_preamble_hash(addr);
+ if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
+ addr, addr, 5, addr))
+ return -EINVAL;
+
+ text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
+ WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
+ text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
+ }
+
+ return 0;
+}
+
+/* .retpoline_sites */
+static int cfi_rand_callers(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ addr -= fineibt_caller_size;
+ hash = decode_caller_hash(addr);
+ if (hash) {
+ hash = -cfi_rehash(hash);
+ text_poke_early(addr + 2, &hash, 4);
+ }
+ }
+
+ return 0;
+}
+
+static int cfi_rewrite_callers(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ u32 hash;
+
+ addr -= fineibt_caller_size;
+ hash = decode_caller_hash(addr);
+ if (hash) {
+ text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
+ WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
+ text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+ }
+ /* rely on apply_retpolines() */
+ }
+
+ return 0;
+}
+
+static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+ s32 *start_cfi, s32 *end_cfi, bool builtin)
+{
+ int ret;
+
+ if (WARN_ONCE(fineibt_preamble_size != 16,
+ "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
+ return;
+
+ if (cfi_mode == CFI_DEFAULT) {
+ cfi_mode = CFI_KCFI;
+ if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
+ cfi_mode = CFI_FINEIBT;
+ }
+
+ /*
+ * Rewrite the callers to not use the __cfi_ stubs, such that we might
+ * rewrite them. This disables all CFI. If this succeeds but any of the
+ * later stages fails, we're without CFI.
+ */
+ ret = cfi_disable_callers(start_retpoline, end_retpoline);
+ if (ret)
+ goto err;
+
+ if (cfi_rand) {
+ if (builtin)
+ cfi_seed = get_random_u32();
+
+ ret = cfi_rand_preamble(start_cfi, end_cfi);
+ if (ret)
+ goto err;
+
+ ret = cfi_rand_callers(start_retpoline, end_retpoline);
+ if (ret)
+ goto err;
+ }
+
+ switch (cfi_mode) {
+ case CFI_OFF:
+ if (builtin)
+ pr_info("Disabling CFI\n");
+ return;
+
+ case CFI_KCFI:
+ ret = cfi_enable_callers(start_retpoline, end_retpoline);
+ if (ret)
+ goto err;
+
+ if (builtin)
+ pr_info("Using kCFI\n");
+ return;
+
+ case CFI_FINEIBT:
+ ret = cfi_rewrite_preamble(start_cfi, end_cfi);
+ if (ret)
+ goto err;
+
+ ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
+ if (ret)
+ goto err;
+
+ if (builtin)
+ pr_info("Using FineIBT CFI\n");
+ return;
+
+ default:
+ break;
}
+
+err:
+ pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
}
#else
-void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
+static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+ s32 *start_cfi, s32 *end_cfi, bool builtin)
+{
+}
-#endif /* CONFIG_X86_KERNEL_IBT */
+#endif
+
+void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+ s32 *start_cfi, s32 *end_cfi)
+{
+ return __apply_fineibt(start_retpoline, end_retpoline,
+ start_cfi, end_cfi,
+ /* .builtin = */ false);
+}
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
@@ -934,6 +1379,9 @@ void __init alternative_instructions(void)
*/
apply_paravirt(__parainstructions, __parainstructions_end);
+ __apply_fineibt(__retpoline_sites, __retpoline_sites_end,
+ __cfi_sites, __cfi_sites_end, true);
+
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
@@ -947,6 +1395,12 @@ void __init alternative_instructions(void)
*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
+ /*
+ * Now all calls are established. Apply the call thunks if
+ * required.
+ */
+ callthunks_patch_builtin_calls();
+
apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
#ifdef CONFIG_SMP
@@ -1236,27 +1690,15 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
return __text_poke(text_poke_memcpy, addr, opcode, len);
}
-/**
- * text_poke_copy - Copy instructions into (an unused part of) RX memory
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy, could be more than 2x PAGE_SIZE
- *
- * Not safe against concurrent execution; useful for JITs to dump
- * new code blocks into unused regions of RX memory. Can be used in
- * conjunction with synchronize_rcu_tasks() to wait for existing
- * execution to quiesce after having made sure no existing functions
- * pointers are live.
- */
-void *text_poke_copy(void *addr, const void *opcode, size_t len)
+void *text_poke_copy_locked(void *addr, const void *opcode, size_t len,
+ bool core_ok)
{
unsigned long start = (unsigned long)addr;
size_t patched = 0;
- if (WARN_ON_ONCE(core_kernel_text(start)))
+ if (WARN_ON_ONCE(!core_ok && core_kernel_text(start)))
return NULL;
- mutex_lock(&text_mutex);
while (patched < len) {
unsigned long ptr = start + patched;
size_t s;
@@ -1266,6 +1708,25 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
__text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
patched += s;
}
+ return addr;
+}
+
+/**
+ * text_poke_copy - Copy instructions into (an unused part of) RX memory
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * Not safe against concurrent execution; useful for JITs to dump
+ * new code blocks into unused regions of RX memory. Can be used in
+ * conjunction with synchronize_rcu_tasks() to wait for existing
+ * execution to quiesce after having made sure no existing functions
+ * pointers are live.
+ */
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+ mutex_lock(&text_mutex);
+ addr = text_poke_copy_locked(addr, opcode, len, false);
mutex_unlock(&text_mutex);
return addr;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 437308004ef2..82c783da16a8 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -107,4 +107,9 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+ OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+ OFFSET(X86_call_depth, pcpu_hot, call_depth);
+#endif
+
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 9b698215d261..bb65371ea9df 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -57,7 +57,7 @@ int main(void)
BLANK();
#ifdef CONFIG_STACKPROTECTOR
- DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
+ OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
BLANK();
#endif
return 0;
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
new file mode 100644
index 000000000000..7d2c75ec9a8c
--- /dev/null
+++ b/arch/x86/kernel/callthunks.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "callthunks: " fmt
+
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/memory.h>
+#include <linux/moduleloader.h>
+#include <linux/static_call.h>
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpu.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+#include <asm/kexec.h>
+#include <asm/nospec-branch.h>
+#include <asm/paravirt.h>
+#include <asm/sections.h>
+#include <asm/switch_to.h>
+#include <asm/sync_core.h>
+#include <asm/text-patching.h>
+#include <asm/xen/hypercall.h>
+
+static int __initdata_or_module debug_callthunks;
+
+#define prdbg(fmt, args...) \
+do { \
+ if (debug_callthunks) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##args); \
+} while(0)
+
+static int __init debug_thunks(char *str)
+{
+ debug_callthunks = 1;
+ return 1;
+}
+__setup("debug-callthunks", debug_thunks);
+
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DEFINE_PER_CPU(u64, __x86_call_count);
+DEFINE_PER_CPU(u64, __x86_ret_count);
+DEFINE_PER_CPU(u64, __x86_stuffs_count);
+DEFINE_PER_CPU(u64, __x86_ctxsw_count);
+EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
+EXPORT_SYMBOL_GPL(__x86_call_count);
+#endif
+
+extern s32 __call_sites[], __call_sites_end[];
+
+struct thunk_desc {
+ void *template;
+ unsigned int template_size;
+};
+
+struct core_text {
+ unsigned long base;
+ unsigned long end;
+ const char *name;
+};
+
+static bool thunks_initialized __ro_after_init;
+
+static const struct core_text builtin_coretext = {
+ .base = (unsigned long)_text,
+ .end = (unsigned long)_etext,
+ .name = "builtin",
+};
+
+asm (
+ ".pushsection .rodata \n"
+ ".global skl_call_thunk_template \n"
+ "skl_call_thunk_template: \n"
+ __stringify(INCREMENT_CALL_DEPTH)" \n"
+ ".global skl_call_thunk_tail \n"
+ "skl_call_thunk_tail: \n"
+ ".popsection \n"
+);
+
+extern u8 skl_call_thunk_template[];
+extern u8 skl_call_thunk_tail[];
+
+#define SKL_TMPL_SIZE \
+ ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template))
+
+extern void error_entry(void);
+extern void xen_error_entry(void);
+extern void paranoid_entry(void);
+
+static inline bool within_coretext(const struct core_text *ct, void *addr)
+{
+ unsigned long p = (unsigned long)addr;
+
+ return ct->base <= p && p < ct->end;
+}
+
+static inline bool within_module_coretext(void *addr)
+{
+ bool ret = false;
+
+#ifdef CONFIG_MODULES
+ struct module *mod;
+
+ preempt_disable();
+ mod = __module_address((unsigned long)addr);
+ if (mod && within_module_core((unsigned long)addr, mod))
+ ret = true;
+ preempt_enable();
+#endif
+ return ret;
+}
+
+static bool is_coretext(const struct core_text *ct, void *addr)
+{
+ if (ct && within_coretext(ct, addr))
+ return true;
+ if (within_coretext(&builtin_coretext, addr))
+ return true;
+ return within_module_coretext(addr);
+}
+
+static __init_or_module bool skip_addr(void *dest)
+{
+ if (dest == error_entry)
+ return true;
+ if (dest == paranoid_entry)
+ return true;
+ if (dest == xen_error_entry)
+ return true;
+ /* Does FILL_RSB... */
+ if (dest == __switch_to_asm)
+ return true;
+ /* Accounts directly */
+ if (dest == ret_from_fork)
+ return true;
+#ifdef CONFIG_HOTPLUG_CPU
+ if (dest == start_cpu0)
+ return true;
+#endif
+#ifdef CONFIG_FUNCTION_TRACER
+ if (dest == __fentry__)
+ return true;
+#endif
+#ifdef CONFIG_KEXEC_CORE
+ if (dest >= (void *)relocate_kernel &&
+ dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
+ return true;
+#endif
+#ifdef CONFIG_XEN
+ if (dest >= (void *)hypercall_page &&
+ dest < (void*)hypercall_page + PAGE_SIZE)
+ return true;
+#endif
+ return false;
+}
+
+static __init_or_module void *call_get_dest(void *addr)
+{
+ struct insn insn;
+ void *dest;
+ int ret;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /* Patched out call? */
+ if (insn.opcode.bytes[0] != CALL_INSN_OPCODE)
+ return NULL;
+
+ dest = addr + insn.length + insn.immediate.value;
+ if (skip_addr(dest))
+ return NULL;
+ return dest;
+}
+
+static const u8 nops[] = {
+ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+};
+
+static __init_or_module void *patch_dest(void *dest, bool direct)
+{
+ unsigned int tsize = SKL_TMPL_SIZE;
+ u8 *pad = dest - tsize;
+
+ /* Already patched? */
+ if (!bcmp(pad, skl_call_thunk_template, tsize))
+ return pad;
+
+ /* Ensure there are nops */
+ if (bcmp(pad, nops, tsize)) {
+ pr_warn_once("Invalid padding area for %pS\n", dest);
+ return NULL;
+ }
+
+ if (direct)
+ memcpy(pad, skl_call_thunk_template, tsize);
+ else
+ text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+ return pad;
+}
+
+static __init_or_module void patch_call(void *addr, const struct core_text *ct)
+{
+ void *pad, *dest;
+ u8 bytes[8];
+
+ if (!within_coretext(ct, addr))
+ return;
+
+ dest = call_get_dest(addr);
+ if (!dest || WARN_ON_ONCE(IS_ERR(dest)))
+ return;
+
+ if (!is_coretext(ct, dest))
+ return;
+
+ pad = patch_dest(dest, within_coretext(ct, dest));
+ if (!pad)
+ return;
+
+ prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr,
+ dest, dest, pad);
+ __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE);
+ text_poke_early(addr, bytes, CALL_INSN_SIZE);
+}
+
+static __init_or_module void
+patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++)
+ patch_call((void *)s + *s, ct);
+}
+
+static __init_or_module void
+patch_paravirt_call_sites(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end,
+ const struct core_text *ct)
+{
+ struct paravirt_patch_site *p;
+
+ for (p = start; p < end; p++)
+ patch_call(p->instr, ct);
+}
+
+static __init_or_module void
+callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
+{
+ prdbg("Patching call sites %s\n", ct->name);
+ patch_call_sites(cs->call_start, cs->call_end, ct);
+ patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
+ prdbg("Patching call sites done%s\n", ct->name);
+}
+
+void __init callthunks_patch_builtin_calls(void)
+{
+ struct callthunk_sites cs = {
+ .call_start = __call_sites,
+ .call_end = __call_sites_end,
+ .pv_start = __parainstructions,
+ .pv_end = __parainstructions_end
+ };
+
+ if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+ return;
+
+ pr_info("Setting up call depth tracking\n");
+ mutex_lock(&text_mutex);
+ callthunks_setup(&cs, &builtin_coretext);
+ static_call_force_reinit();
+ thunks_initialized = true;
+ mutex_unlock(&text_mutex);
+}
+
+void *callthunks_translate_call_dest(void *dest)
+{
+ void *target;
+
+ lockdep_assert_held(&text_mutex);
+
+ if (!thunks_initialized || skip_addr(dest))
+ return dest;
+
+ if (!is_coretext(NULL, dest))
+ return dest;
+
+ target = patch_dest(dest, false);
+ return target ? : dest;
+}
+
+bool is_callthunk(void *addr)
+{
+ unsigned int tmpl_size = SKL_TMPL_SIZE;
+ void *tmpl = skl_call_thunk_template;
+ unsigned long dest;
+
+ dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
+ if (!thunks_initialized || skip_addr((void *)dest))
+ return false;
+
+ return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+}
+
+#ifdef CONFIG_BPF_JIT
+int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+{
+ unsigned int tmpl_size = SKL_TMPL_SIZE;
+ void *tmpl = skl_call_thunk_template;
+
+ if (!thunks_initialized)
+ return 0;
+
+ /* Is function call target a thunk? */
+ if (func && is_callthunk(func))
+ return 0;
+
+ memcpy(*pprog, tmpl, tmpl_size);
+ *pprog += tmpl_size;
+ return tmpl_size;
+}
+#endif
+
+#ifdef CONFIG_MODULES
+void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
+ struct module *mod)
+{
+ struct core_text ct = {
+ .base = (unsigned long)mod->core_layout.base,
+ .end = (unsigned long)mod->core_layout.base + mod->core_layout.size,
+ .name = mod->name,
+ };
+
+ if (!thunks_initialized)
+ return;
+
+ mutex_lock(&text_mutex);
+ callthunks_setup(cs, &ct);
+ mutex_unlock(&text_mutex);
+}
+#endif /* CONFIG_MODULES */
+
+#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
+static int callthunks_debug_show(struct seq_file *m, void *p)
+{
+ unsigned long cpu = (unsigned long)m->private;
+
+ seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
+ per_cpu(__x86_call_count, cpu),
+ per_cpu(__x86_ret_count, cpu),
+ per_cpu(__x86_stuffs_count, cpu),
+ per_cpu(__x86_ctxsw_count, cpu));
+ return 0;
+}
+
+static int callthunks_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, callthunks_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_ops = {
+ .open = callthunks_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init callthunks_debugfs_init(void)
+{
+ struct dentry *dir;
+ unsigned long cpu;
+
+ dir = debugfs_create_dir("callthunks", NULL);
+ for_each_possible_cpu(cpu) {
+ void *arg = (void *)cpu;
+ char name [10];
+
+ sprintf(name, "cpu%lu", cpu);
+ debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
+ }
+ return 0;
+}
+__initcall(callthunks_debugfs_init);
+#endif
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index f10a921ee756..d7e3ceaf75c1 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,9 +17,6 @@ KMSAN_SANITIZE_common.o := n
# As above, instrumenting secondary CPU boot code causes boot hangs.
KCSAN_SANITIZE_common.o := n
-# Make sure load_percpu_segment has no stackprotector
-CFLAGS_common.o := -fno-stack-protector
-
obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a8a5033a804d..d970ddb0cc65 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -787,6 +787,7 @@ enum retbleed_mitigation {
RETBLEED_MITIGATION_IBPB,
RETBLEED_MITIGATION_IBRS,
RETBLEED_MITIGATION_EIBRS,
+ RETBLEED_MITIGATION_STUFF,
};
enum retbleed_mitigation_cmd {
@@ -794,6 +795,7 @@ enum retbleed_mitigation_cmd {
RETBLEED_CMD_AUTO,
RETBLEED_CMD_UNRET,
RETBLEED_CMD_IBPB,
+ RETBLEED_CMD_STUFF,
};
static const char * const retbleed_strings[] = {
@@ -802,6 +804,7 @@ static const char * const retbleed_strings[] = {
[RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
[RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
[RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+ [RETBLEED_MITIGATION_STUFF] = "Mitigation: Stuffing",
};
static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
@@ -831,8 +834,12 @@ static int __init retbleed_parse_cmdline(char *str)
retbleed_cmd = RETBLEED_CMD_UNRET;
} else if (!strcmp(str, "ibpb")) {
retbleed_cmd = RETBLEED_CMD_IBPB;
+ } else if (!strcmp(str, "stuff")) {
+ retbleed_cmd = RETBLEED_CMD_STUFF;
} else if (!strcmp(str, "nosmt")) {
retbleed_nosmt = true;
+ } else if (!strcmp(str, "force")) {
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
} else {
pr_err("Ignoring unknown retbleed option (%s).", str);
}
@@ -879,6 +886,21 @@ static void __init retbleed_select_mitigation(void)
}
break;
+ case RETBLEED_CMD_STUFF:
+ if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) &&
+ spectre_v2_enabled == SPECTRE_V2_RETPOLINE) {
+ retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
+
+ } else {
+ if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING))
+ pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
+ else
+ pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n");
+
+ goto do_cmd_auto;
+ }
+ break;
+
do_cmd_auto:
case RETBLEED_CMD_AUTO:
default:
@@ -916,6 +938,12 @@ do_cmd_auto:
mitigate_smt = true;
break;
+ case RETBLEED_MITIGATION_STUFF:
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
+ x86_set_skl_return_thunk();
+ break;
+
default:
break;
}
@@ -926,7 +954,7 @@ do_cmd_auto:
/*
* Let IBRS trump all on Intel without affecting the effects of the
- * retbleed= cmdline option.
+ * retbleed= cmdline option except for call depth based stuffing
*/
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
switch (spectre_v2_enabled) {
@@ -939,7 +967,8 @@ do_cmd_auto:
retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
break;
default:
- pr_err(RETBLEED_INTEL_MSG);
+ if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+ pr_err(RETBLEED_INTEL_MSG);
}
}
@@ -1413,6 +1442,7 @@ static void __init spectre_v2_select_mitigation(void)
if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
boot_cpu_has_bug(X86_BUG_RETBLEED) &&
retbleed_cmd != RETBLEED_CMD_OFF &&
+ retbleed_cmd != RETBLEED_CMD_STUFF &&
boot_cpu_has(X86_FEATURE_IBRS) &&
boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
mode = SPECTRE_V2_IBRS;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 62b83bc5b4b9..9cfca3d7d0e2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -610,6 +610,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
if (!ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
+ wrmsrl(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
return;
}
@@ -702,16 +703,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
-void load_percpu_segment(int cpu)
-{
-#ifdef CONFIG_X86_32
- loadsegment(fs, __KERNEL_PERCPU);
-#else
- __loadsegment_simple(gs, 0);
- wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#endif
-}
-
#ifdef CONFIG_X86_32
/* The 32-bit entry code needs to find cpu_entry_area. */
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
@@ -739,16 +730,45 @@ void load_fixmap_gdt(int cpu)
}
EXPORT_SYMBOL_GPL(load_fixmap_gdt);
-/*
- * Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one.
+/**
+ * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base
+ * @cpu: The CPU number for which this is invoked
+ *
+ * Invoked during early boot to switch from early GDT and early per CPU to
+ * the direct GDT and the runtime per CPU area. On 32-bit the percpu base
+ * switch is implicit by loading the direct GDT. On 64bit this requires
+ * to update GSBASE.
*/
-void switch_to_new_gdt(int cpu)
+void __init switch_gdt_and_percpu_base(int cpu)
{
- /* Load the original GDT */
load_direct_gdt(cpu);
- /* Reload the per-cpu base */
- load_percpu_segment(cpu);
+
+#ifdef CONFIG_X86_64
+ /*
+ * No need to load %gs. It is already correct.
+ *
+ * Writing %gs on 64bit would zero GSBASE which would make any per
+ * CPU operation up to the point of the wrmsrl() fault.
+ *
+ * Set GSBASE to the new offset. Until the wrmsrl() happens the
+ * early mapping is still valid. That means the GSBASE update will
+ * lose any prior per CPU data which was not copied over in
+ * setup_per_cpu_areas().
+ *
+ * This works even with stackprotector enabled because the
+ * per CPU stack canary is 0 in both per CPU areas.
+ */
+ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
+#else
+ /*
+ * %fs is already set to __KERNEL_PERCPU, but after switching GDT
+ * it is required to load FS again so that the 'hidden' part is
+ * updated from the new GDT. Up to this point the early per CPU
+ * translation is active. Any content of the early per CPU data
+ * which was not copied over in setup_per_cpu_areas() is lost.
+ */
+ loadsegment(fs, __KERNEL_PERCPU);
+#endif
}
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -1993,27 +2013,18 @@ static __init int setup_clearcpuid(char *arg)
}
__setup("clearcpuid=", setup_clearcpuid);
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
+ .current_task = &init_task,
+ .preempt_count = INIT_PREEMPT_COUNT,
+ .top_of_stack = TOP_OF_INIT_STACK,
+};
+EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-/*
- * The following percpu variables are hot. Align current_task to
- * cacheline size such that they fall in the same cacheline.
- */
-DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
- &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
-DEFINE_PER_CPU(void *, hardirq_stack_ptr);
-DEFINE_PER_CPU(bool, hardirq_stack_inuse);
-
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
-
static void wrmsrl_cstar(unsigned long val)
{
/*
@@ -2064,20 +2075,6 @@ void syscall_init(void)
#else /* CONFIG_X86_64 */
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
-/*
- * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
- * the top of the kernel stack. Use an extra percpu variable to track the
- * top of the kernel stack directly.
- */
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
- (unsigned long)&init_thread_union + THREAD_SIZE;
-EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
@@ -2248,12 +2245,6 @@ void cpu_init(void)
boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- switch_to_new_gdt(cpu);
-
if (IS_ENABLED(CONFIG_X86_64)) {
loadsegment(fs, 0);
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 722fd712e1cf..b4905d5173fd 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+ unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
@@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
+ unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6c5defd6569a..f05339fee778 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+ unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
unsigned long *begin;
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bd165004776d..cf15ef5aecff 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void)
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
+ /*
+ * No need to translate into a callthunk. The trampoline does
+ * the depth accounting itself.
+ */
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}
@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long size;
unsigned long *ptr;
void *trampoline;
- void *ip;
+ void *ip, *dest;
/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
@@ -359,7 +363,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
@@ -404,17 +408,19 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* put in the call to the function */
mutex_lock(&text_mutex);
call_offset -= start_offset;
+ /*
+ * No need to translate into a callthunk. The trampoline does
+ * the depth accounting before the call already.
+ */
+ dest = ftrace_ops_get_func(ops);
memcpy(trampoline + call_offset,
- text_gen_insn(CALL_INSN_OPCODE,
- trampoline + call_offset,
- ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
+ text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
+ CALL_INSN_SIZE);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
- set_vm_flush_reset_perms(trampoline);
-
if (likely(system_state != SYSTEM_BOOTING))
set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 2a4be92fd144..1265ad519249 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -3,8 +3,9 @@
* Copyright (C) 2014 Steven Rostedt, Red Hat Inc
*/
-#include <linux/linkage.h>
#include <linux/cfi_types.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
@@ -131,16 +132,19 @@
.endm
SYM_TYPED_FUNC_START(ftrace_stub)
+ CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub)
SYM_TYPED_FUNC_START(ftrace_stub_graph)
+ CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_graph)
#ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__)
+ CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
@@ -149,6 +153,8 @@ SYM_FUNC_START(ftrace_caller)
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
+ CALL_DEPTH_ACCOUNT
+
/* Stack - skipping return address of ftrace_caller */
leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
movq %rcx, RSP(%rsp)
@@ -164,6 +170,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
/* Only ops with REGS flag set should have CS register set */
movq $0, CS(%rsp)
+ /* Account for the function call below */
+ CALL_DEPTH_ACCOUNT
+
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
call ftrace_stub
@@ -193,6 +202,8 @@ SYM_FUNC_START(ftrace_regs_caller)
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
+ CALL_DEPTH_ACCOUNT
+
SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
/* Load the ftrace_ops into the 3rd parameter */
@@ -223,6 +234,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
/* regs go into 4th parameter */
leaq (%rsp), %rcx
+ /* Account for the function call below */
+ CALL_DEPTH_ACCOUNT
+
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
call ftrace_stub
@@ -275,7 +289,20 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
/* Restore flags */
popfq
UNWIND_HINT_FUNC
- RET
+
+ /*
+ * The above left an extra return value on the stack; effectively
+ * doing a tail-call without using a register. This PUSH;RET
+ * pattern unbalances the RSB, inject a pointless CALL to rebalance.
+ */
+ ANNOTATE_INTRA_FUNCTION_CALL
+ CALL .Ldo_rebalance
+ int3
+.Ldo_rebalance:
+ add $8, %rsp
+ ALTERNATIVE __stringify(RET), \
+ __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
+ X86_FEATURE_CALL_DEPTH
SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
@@ -284,6 +311,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
SYM_FUNC_START(__fentry__)
+ CALL_DEPTH_ACCOUNT
+
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
RET
@@ -337,6 +366,8 @@ SYM_CODE_START(return_to_handler)
int3
.Ldo_rop:
mov %rdi, (%rsp)
- RET
+ ALTERNATIVE __stringify(RET), \
+ __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
+ X86_FEATURE_CALL_DEPTH
SYM_CODE_END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d860d437631b..222efd4a09bc 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -370,6 +370,7 @@ SYM_CODE_END(secondary_startup_64)
* start_secondary() via .Ljump_to_C_code.
*/
SYM_CODE_START(start_cpu0)
+ ANNOTATE_NOENDBR
UNWIND_HINT_EMPTY
movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 01833ebf5e8e..dc1049c01f9b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -52,9 +52,6 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { }
#endif
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
-
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
@@ -77,7 +74,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
- irqstk = __this_cpu_read(hardirq_stack_ptr);
+ irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -115,7 +112,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
int node = cpu_to_node(cpu);
struct page *ph, *ps;
- if (per_cpu(hardirq_stack_ptr, cpu))
+ if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
return 0;
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
@@ -127,8 +124,8 @@ int irq_init_percpu_irqstack(unsigned int cpu)
return -ENOMEM;
}
- per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
- per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
+ per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph);
+ per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps);
return 0;
}
@@ -138,7 +135,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
- irqstk = __this_cpu_read(softirq_stack_ptr);
+ irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1c0fb96b9e39..fe0c859873d1 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -50,7 +50,7 @@ static int map_irq_stack(unsigned int cpu)
return -ENOMEM;
/* Store actual TOS to avoid adjustment in the hotpath */
- per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+ per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#else
@@ -63,14 +63,14 @@ static int map_irq_stack(unsigned int cpu)
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
/* Store actual TOS to avoid adjustment in the hotpath */
- per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+ per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
return 0;
}
#endif
int irq_init_percpu_irqstack(unsigned int cpu)
{
- if (per_cpu(hardirq_stack_ptr, cpu))
+ if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
return 0;
return map_irq_stack(cpu);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index eb8bc82846b9..01b8d956aa76 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -414,7 +414,6 @@ void *alloc_insn_page(void)
if (!page)
return NULL;
- set_vm_flush_reset_perms(page);
/*
* First make the page read-only, and only then make it executable to
* prevent it from being W+X in between.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d4e48b4a438b..4d053cb2c48a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -798,19 +798,13 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
* Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
* restoring to/from the stack.
*/
-asm(
-".pushsection .text;"
-".global __raw_callee_save___kvm_vcpu_is_preempted;"
-".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
-"__raw_callee_save___kvm_vcpu_is_preempted:"
-ASM_ENDBR
-"movq __per_cpu_offset(,%rdi,8), %rax;"
-"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
-"setne %al;"
-ASM_RET
-".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
-".popsection");
+#define PV_VCPU_PREEMPTED_ASM \
+ "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \
+ "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
+ "setne %al\n\t"
+DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
+ PV_VCPU_PREEMPTED_ASM, .text);
#endif
static void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d85a6980e263..705fb2a41d7d 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -74,10 +74,11 @@ void *module_alloc(unsigned long size)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask,
- PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
- __builtin_return_address(0));
+ MODULES_VADDR + get_module_load_offset(),
+ MODULES_END, gfp_mask, PAGE_KERNEL,
+ VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
@@ -253,7 +254,8 @@ int module_finalize(const Elf_Ehdr *hdr,
{
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
- *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
+ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
+ *calls = NULL, *cfi = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -271,6 +273,10 @@ int module_finalize(const Elf_Ehdr *hdr,
retpolines = s;
if (!strcmp(".return_sites", secstrings + s->sh_name))
returns = s;
+ if (!strcmp(".call_sites", secstrings + s->sh_name))
+ calls = s;
+ if (!strcmp(".cfi_sites", secstrings + s->sh_name))
+ cfi = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
@@ -283,6 +289,22 @@ int module_finalize(const Elf_Ehdr *hdr,
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
+ if (retpolines || cfi) {
+ void *rseg = NULL, *cseg = NULL;
+ unsigned int rsize = 0, csize = 0;
+
+ if (retpolines) {
+ rseg = (void *)retpolines->sh_addr;
+ rsize = retpolines->sh_size;
+ }
+
+ if (cfi) {
+ cseg = (void *)cfi->sh_addr;
+ csize = cfi->sh_size;
+ }
+
+ apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
+ }
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
@@ -296,6 +318,21 @@ int module_finalize(const Elf_Ehdr *hdr,
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
+ if (calls || para) {
+ struct callthunk_sites cs = {};
+
+ if (calls) {
+ cs.call_start = (void *)calls->sh_addr;
+ cs.call_end = (void *)calls->sh_addr + calls->sh_size;
+ }
+
+ if (para) {
+ cs.pv_start = (void *)para->sh_addr;
+ cs.pv_end = (void *)para->sh_addr + para->sh_size;
+ }
+
+ callthunks_patch_module_calls(&cs, me);
+ }
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7ca2d46c08cc..327757afb027 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -37,27 +37,10 @@
* nop stub, which must not clobber anything *including the stack* to
* avoid confusing the entry prologues.
*/
-extern void _paravirt_nop(void);
-asm (".pushsection .entry.text, \"ax\"\n"
- ".global _paravirt_nop\n"
- "_paravirt_nop:\n\t"
- ASM_ENDBR
- ASM_RET
- ".size _paravirt_nop, . - _paravirt_nop\n\t"
- ".type _paravirt_nop, @function\n\t"
- ".popsection");
+DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text);
/* stub always returning 0. */
-asm (".pushsection .entry.text, \"ax\"\n"
- ".global paravirt_ret0\n"
- "paravirt_ret0:\n\t"
- ASM_ENDBR
- "xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
- ASM_RET
- ".size paravirt_ret0, . - paravirt_ret0\n\t"
- ".type paravirt_ret0, @function\n\t"
- ".popsection");
-
+DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text);
void __init default_banner(void)
{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2f314b170c9f..470c128759ea 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
arch_end_context_switch(next_p);
/*
- * Reload esp0 and cpu_current_top_of_stack. This changes
+ * Reload esp0 and pcpu_hot.top_of_stack. This changes
* current_thread_info(). Refresh the SYSENTER configuration in
* case prev or next is vm86.
*/
update_task_stack(next_p);
refresh_sysenter_cs(next);
- this_cpu_write(cpu_current_top_of_stack,
+ this_cpu_write(pcpu_hot.top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
@@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
loadsegment(gs, next->gs);
- this_cpu_write(current_task, next_p);
+ raw_cpu_write(pcpu_hot.current_task, next_p);
switch_fpu_finish();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2f469175be8..4e34b3b68ebd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -563,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
- this_cpu_read(hardirq_stack_inuse));
+ this_cpu_read(pcpu_hot.hardirq_stack_inuse));
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_prepare(prev_fpu, cpu);
@@ -617,8 +617,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- this_cpu_write(current_task, next_p);
- this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+ raw_cpu_write(pcpu_hot.current_task, next_p);
+ raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish();
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4809c0dc4eb0..4a73351f87f8 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -41,6 +41,7 @@
.text
.align PAGE_SIZE
.code64
+SYM_CODE_START_NOALIGN(relocate_range)
SYM_CODE_START_NOALIGN(relocate_kernel)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
@@ -312,5 +313,5 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
int3
SYM_CODE_END(swap_pages)
- .globl kexec_control_code_size
-.set kexec_control_code_size, . - relocate_kernel
+ .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
+SYM_CODE_END(relocate_range);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b26123c90b4f..c242dc47e9cb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -23,9 +23,6 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
@@ -172,7 +169,7 @@ void __init setup_per_cpu_areas(void)
for_each_possible_cpu(cpu) {
per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
- per_cpu(cpu_number, cpu) = cpu;
+ per_cpu(pcpu_hot.cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
/*
* Copy data used in early init routines from the
@@ -211,7 +208,7 @@ void __init setup_per_cpu_areas(void)
* area. Reload any changed state for the boot CPU.
*/
if (!cpu)
- switch_to_new_gdt(cpu);
+ switch_gdt_and_percpu_base(cpu);
}
/* indicate the early static arrays will soon be gone */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c295f0e008a3..55cad72715d9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1048,7 +1048,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
- per_cpu(current_task, cpu) = idle;
+ per_cpu(pcpu_hot.current_task, cpu) = idle;
cpu_init_stack_canary(cpu, idle);
/* Initialize the interrupt stack(s) */
@@ -1058,7 +1058,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
- per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
+ per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
@@ -1453,7 +1453,11 @@ void arch_thaw_secondary_cpus_end(void)
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
- switch_to_new_gdt(me);
+
+ /* SMP handles this from setup_per_cpu_areas() */
+ if (!IS_ENABLED(CONFIG_SMP))
+ switch_gdt_and_percpu_base(me);
+
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
cpu_set_state_online(me);
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index aaaba85d6d7f..2ebc338980bc 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -34,6 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
switch (type) {
case CALL:
+ func = callthunks_translate_call_dest(func);
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
if (func == &__static_call_return0) {
emulate = code;
@@ -52,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
case RET:
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
- code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
break;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d1e1679f32cf..d317dc3d06a3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -858,7 +858,7 @@ DEFINE_IDTENTRY_RAW(exc_int3)
*/
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{
- struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+ struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
if (regs != eregs)
*regs = *eregs;
return regs;
@@ -876,7 +876,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* trust it and switch to the current kernel stack
*/
if (ip_within_syscall_gap(regs)) {
- sp = this_cpu_read(cpu_current_top_of_stack);
+ sp = this_cpu_read(pcpu_hot.top_of_stack);
goto sync;
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index c059820dfaea..cdf6c6060170 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -136,6 +136,21 @@ static struct orc_entry null_orc_entry = {
.type = UNWIND_HINT_TYPE_CALL
};
+#ifdef CONFIG_CALL_THUNKS
+static struct orc_entry *orc_callthunk_find(unsigned long ip)
+{
+ if (!is_callthunk((void *)ip))
+ return NULL;
+
+ return &null_orc_entry;
+}
+#else
+static struct orc_entry *orc_callthunk_find(unsigned long ip)
+{
+ return NULL;
+}
+#endif
+
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = UNWIND_HINT_TYPE_CALL,
@@ -189,7 +204,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
- return orc_ftrace_find(ip);
+ orc = orc_ftrace_find(ip);
+ if (orc)
+ return orc;
+
+ return orc_callthunk_find(ip);
}
#ifdef CONFIG_MODULES
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 15f29053cec4..2e0ee14229bf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -132,18 +132,19 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
- ALIGN_ENTRY_TEXT_BEGIN
- ENTRY_TEXT
- ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
- STATIC_CALL_TEXT
- *(.gnu.warning)
-
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
*(.text.__x86.*)
__indirect_thunk_end = .;
#endif
+ STATIC_CALL_TEXT
+
+ ALIGN_ENTRY_TEXT_BEGIN
+ ENTRY_TEXT
+ ALIGN_ENTRY_TEXT_END
+ *(.gnu.warning)
+
} :text =0xcccc
/* End of text section, which should occupy whole number of pages */
@@ -290,6 +291,13 @@ SECTIONS
*(.return_sites)
__return_sites_end = .;
}
+
+ . = ALIGN(8);
+ .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
+ __call_sites = .;
+ *(.call_sites)
+ __call_sites_end = .;
+ }
#endif
#ifdef CONFIG_X86_KERNEL_IBT
@@ -301,6 +309,15 @@ SECTIONS
}
#endif
+#ifdef CONFIG_FINEIBT
+ . = ALIGN(8);
+ .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
+ __cfi_sites = .;
+ *(.cfi_sites)
+ __cfi_sites_end = .;
+ }
+#endif
+
/*
* struct alt_inst entries. From the header (alternative.h):
* "Alternative instructions for different CPU types or capabilities"
@@ -493,11 +510,3 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif
#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_KEXEC_CORE
-#include <asm/kexec.h>
-
-. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
- "kexec control code size is too big");
-#endif
-
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 34367dc203f2..8e8295e774f0 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 1e3de0769b81..b5a6d83106bc 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -11,6 +11,7 @@ asm(
".text\n"
".type just_return_func, @function\n"
".globl just_return_func\n"
+ ASM_FUNC_ALIGN
"just_return_func:\n"
ANNOTATE_NOENDBR
ASM_RET
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index b7dfd60243b7..32125224fcca 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -47,8 +47,6 @@ SYM_FUNC_START(__put_user_1)
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
- ENDBR
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -56,54 +54,87 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
RET
SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
+
+SYM_FUNC_START(__put_user_nocheck_1)
+ ENDBR
+ ASM_STAC
+2: movb %al,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+ RET
+SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
- ENDBR
ASM_STAC
-2: movw %ax,(%_ASM_CX)
+3: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
+
+SYM_FUNC_START(__put_user_nocheck_2)
+ ENDBR
+ ASM_STAC
+4: movw %ax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+ RET
+SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
- ENDBR
ASM_STAC
-3: movl %eax,(%_ASM_CX)
+5: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
+
+SYM_FUNC_START(__put_user_nocheck_4)
+ ENDBR
+ ASM_STAC
+6: movl %eax,(%_ASM_CX)
+ xor %ecx,%ecx
+ ASM_CLAC
+ RET
+SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
- ENDBR
ASM_STAC
-4: mov %_ASM_AX,(%_ASM_CX)
+7: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
-5: movl %edx,4(%_ASM_CX)
+8: movl %edx,4(%_ASM_CX)
#endif
xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+
+SYM_FUNC_START(__put_user_nocheck_8)
+ ENDBR
+ ASM_STAC
+9: mov %_ASM_AX,(%_ASM_CX)
+#ifdef CONFIG_X86_32
+10: movl %edx,4(%_ASM_CX)
+#endif
+ xor %ecx,%ecx
+ ASM_CLAC
+ RET
+SYM_FUNC_END(__put_user_nocheck_8)
EXPORT_SYMBOL(__put_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
@@ -117,6 +148,11 @@ SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
-#ifdef CONFIG_X86_32
_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac)
+#ifdef CONFIG_X86_32
+ _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac)
#endif
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 073289a55f84..5f61c65322be 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -5,24 +5,27 @@
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
#include <asm/frame.h>
.section .text.__x86.indirect_thunk
-.macro RETPOLINE reg
+
+.macro POLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
-.Lspec_trap_\@:
- UNWIND_HINT_EMPTY
- pause
- lfence
- jmp .Lspec_trap_\@
+ int3
.Ldo_rop_\@:
mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC
+.endm
+
+.macro RETPOLINE reg
+ POLINE \reg
RET
.endm
@@ -52,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
*/
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
@@ -64,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array)
-#define GEN(reg) EXPORT_THUNK(reg)
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+.macro CALL_THUNK reg
+ .align RETPOLINE_THUNK_SIZE
+
+SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
+
+ CALL_DEPTH_ACCOUNT
+ POLINE \reg
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+.endm
+
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_START(__x86_indirect_call_thunk_array)
+
+#define GEN(reg) CALL_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_END(__x86_indirect_call_thunk_array)
+
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+.macro JUMP_THUNK reg
+ .align RETPOLINE_THUNK_SIZE
+
+SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
+ POLINE \reg
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+.endm
+
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_START(__x86_indirect_jump_thunk_array)
+
+#define GEN(reg) JUMP_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_END(__x86_indirect_jump_thunk_array)
+
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+#endif
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
@@ -140,3 +197,37 @@ __EXPORT_THUNK(zen_untrain_ret)
EXPORT_SYMBOL(__x86_return_thunk)
#endif /* CONFIG_RETHUNK */
+
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+
+ .align 64
+SYM_FUNC_START(__x86_return_skl)
+ ANNOTATE_NOENDBR
+ /*
+ * Keep the hotpath in a 16byte I-fetch for the non-debug
+ * case.
+ */
+ CALL_THUNKS_DEBUG_INC_RETS
+ shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
+ jz 1f
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+1:
+ CALL_THUNKS_DEBUG_INC_STUFFS
+ .rept 16
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 2f
+ int3
+2:
+ .endr
+ add $(8*16), %rsp
+
+ CREDIT_CALL_DEPTH
+
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_FUNC_END(__x86_return_skl)
+
+#endif /* CONFIG_CALL_DEPTH_TRACKING */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 36ffe67ad6e5..b808be77635e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
#include <linux/memory.h>
#include <linux/sort.h>
#include <asm/extable.h>
+#include <asm/ftrace.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
@@ -340,6 +341,13 @@ static int emit_call(u8 **pprog, void *func, void *ip)
return emit_patch(pprog, func, ip, 0xE8);
}
+static int emit_rsb_call(u8 **pprog, void *func, void *ip)
+{
+ OPTIMIZER_HIDE_VAR(func);
+ x86_call_depth_emit_accounting(pprog, func);
+ return emit_patch(pprog, func, ip, 0xE8);
+}
+
static int emit_jump(u8 **pprog, void *func, void *ip)
{
return emit_patch(pprog, func, ip, 0xE9);
@@ -417,7 +425,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
- emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+ if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+ emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
+ else
+ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else {
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
@@ -432,7 +443,7 @@ static void emit_return(u8 **pprog, u8 *ip)
u8 *prog = *pprog;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
- emit_jump(&prog, &__x86_return_thunk, ip);
+ emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
if (IS_ENABLED(CONFIG_SLS))
@@ -1514,19 +1525,26 @@ st: if (is_imm8(insn->off))
break;
/* call */
- case BPF_JMP | BPF_CALL:
+ case BPF_JMP | BPF_CALL: {
+ int offs;
+
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
-round_up(bpf_prog->aux->stack_depth, 8) - 8);
- if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+ if (!imm32)
return -EINVAL;
+ offs = 7 + x86_call_depth_emit_accounting(&prog, func);
} else {
- if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
+ if (!imm32)
return -EINVAL;
+ offs = x86_call_depth_emit_accounting(&prog, func);
}
+ if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ return -EINVAL;
break;
+ }
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
@@ -1917,7 +1935,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
- if (emit_call(&prog, bpf_trampoline_enter(p), prog))
+ if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@ -1938,7 +1956,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
/* call JITed bpf program or interpreter */
- if (emit_call(&prog, p->bpf_func, prog))
+ if (emit_rsb_call(&prog, p->bpf_func, prog))
return -EINVAL;
/*
@@ -1962,7 +1980,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
/* arg3: lea rdx, [rbp - run_ctx_off] */
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
- if (emit_call(&prog, bpf_trampoline_exit(p), prog))
+ if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
return -EINVAL;
*pprog = prog;
@@ -2184,6 +2202,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
prog = image;
EMIT_ENDBR();
+ /*
+ * This is the direct-call trampoline, as such it needs accounting
+ * for the __fentry__ call.
+ */
+ x86_call_depth_emit_accounting(&prog, NULL);
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
@@ -2210,7 +2233,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
- if (emit_call(&prog, __bpf_tramp_enter, prog)) {
+ if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2242,7 +2265,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
EMIT2(0xff, 0xd0); /* call *rax */
} else {
/* call original function */
- if (emit_call(&prog, orig_call, prog)) {
+ if (emit_rsb_call(&prog, orig_call, prog)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2286,7 +2309,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
im->ip_epilogue = prog;
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
- if (emit_call(&prog, __bpf_tramp_exit, prog)) {
+ if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
ret = -EINVAL;
goto cleanup;
}
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index e94e0050a583..6f955eb1e163 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -159,7 +159,7 @@ int relocate_restore_code(void)
if (!relocated_restore_code)
return -ENOMEM;
- memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
+ __memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3_pa()) +
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 468e8c8cce01..5b1379662877 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1210,7 +1210,7 @@ static void __init xen_setup_gdt(int cpu)
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
pv_ops.cpu.load_gdt = xen_load_gdt_boot;
- switch_to_new_gdt(cpu);
+ switch_gdt_and_percpu_base(cpu);
pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
pv_ops.cpu.load_gdt = xen_load_gdt;