From da546d6b748e570aa6e44acaa515cfc43baeaa0d Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 3 Sep 2021 00:03:25 +0200 Subject: arm64: dts: qcom: ipq8074: remove USB tx-fifo-resize property tx-fifo-resize is now added by default by the dwc3-qcom driver to the SNPS DWC3 child node. So, lets drop the tx-fifo-resize property from dwc3-qcom nodes as having it there will cause the dwc3-qcom driver to error and abort probe with: [ 1.362938] dwc3-qcom 8af8800.usb: unable to add property [ 1.368405] dwc3-qcom 8af8800.usb: failed to register DWC3 Core, err=-17 Fixes: cefdd52fa045 ("usb: dwc3: dwc3-qcom: Enable tx-fifo-resize property by default") Signed-off-by: Robert Marko Link: https://lore.kernel.org/r/20210902220325.1783567-1-robimarko@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index a620ac0d0b19..db333001df4d 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -487,7 +487,6 @@ interrupts = ; phys = <&qusb_phy_0>, <&usb0_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; @@ -528,7 +527,6 @@ interrupts = ; phys = <&qusb_phy_1>, <&usb1_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; -- cgit v1.2.3-58-ga151 From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 14 Sep 2021 10:33:51 +0800 Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog Currently if a function ptr in struct_ops has a return value, its caller will get a random return value from it, because the return value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline to save and return the return value of struct_ops prog if ret_size of the function ptr is greater than 0. Also restricting the flag to be used alone. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com --- arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++++++++----------- include/linux/bpf.h | 3 ++- kernel/bpf/bpf_struct_ops.c | 7 ++++-- 3 files changed, 47 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0fe6aacef3db..d24a512fd6f3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool mod_ret) + struct bpf_prog *p, int stack_size, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; @@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; - /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + /* + * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. + * + * BPF_TRAMP_FENTRY trampoline may need to return the return + * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ - if (mod_ret) + if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); /* replace 2 nops with JE insn, since jmp target is known */ @@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size) + struct bpf_tramp_progs *tp, int stack_size, + bool save_ret) { int i; u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + save_ret)) return -EINVAL; } *pprog = prog; @@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static bool is_valid_bpf_tramp_flags(unsigned int flags) +{ + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return false; + + /* + * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ + if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && + (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) + return false; + + return true; +} + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; + bool save_ret; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ if (nr_args > 6) return -ENOTSUPP; - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) + if (!is_valid_bpf_tramp_flags(flags)) return -EINVAL; - if (flags & BPF_TRAMP_F_CALL_ORIG) - stack_size += 8; /* room for return value of orig_call */ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -2005,7 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, stack_size)) + if (invoke_bpf(m, &prog, fentry, stack_size, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; if (fmod_ret->nr_progs) { @@ -2052,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) { + if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ret = -EINVAL; goto cleanup; } @@ -2072,9 +2098,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - /* restore original return value back into RAX */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); } + /* restore return value of orig_call or fentry prog back into RAX */ + if (save_ret) + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f4c16f19f83e..020a7d5bf470 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -578,11 +578,12 @@ struct btf_func_model { * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) - /* Store IP address of the caller on the trampoline stack, * so it's available for trampoline's programs. */ #define BPF_TRAMP_F_IP_ARG BIT(3) +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ +#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d6731c32864e..9abcc33f02cf 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; + u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = st_ops->func_models[i].ret_size > 0 ? + BPF_TRAMP_F_RET_FENTRY_RET : 0; err = arch_prepare_bpf_trampoline(NULL, image, st_map->image + PAGE_SIZE, - &st_ops->func_models[i], 0, - tprogs, NULL); + &st_ops->func_models[i], + flags, tprogs, NULL); if (err < 0) goto reset_unlock; -- cgit v1.2.3-58-ga151 From 37cb28ec7d3a36a5bace7063a3dba633ab110f8b Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Wed, 15 Sep 2021 17:04:37 +0100 Subject: bpf, mips: Validate conditional branch offsets The conditional branch instructions on MIPS use 18-bit signed offsets allowing for a branch range of 128 KBytes (backward and forward). However, this limit is not observed by the cBPF JIT compiler, and so the JIT compiler emits out-of-range branches when translating certain cBPF programs. A specific example of such a cBPF program is included in the "BPF_MAXINSNS: exec all MSH" test from lib/test_bpf.c that executes anomalous machine code containing incorrect branch offsets under JIT. Furthermore, this issue can be abused to craft undesirable machine code, where the control flow is hijacked to execute arbitrary Kernel code. The following steps can be used to reproduce the issue: # echo 1 > /proc/sys/net/core/bpf_jit_enable # modprobe test_bpf test_name="BPF_MAXINSNS: exec all MSH" This should produce multiple warnings from build_bimm() similar to: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 209 at arch/mips/mm/uasm-mips.c:210 build_insn+0x558/0x590 Micro-assembler field overflow Modules linked in: test_bpf(+) CPU: 0 PID: 209 Comm: modprobe Not tainted 5.14.3 #1 Stack : 00000000 807bb824 82b33c9c 801843c0 00000000 00000004 00000000 63c9b5ee 82b33af4 80999898 80910000 80900000 82fd6030 00000001 82b33a98 82087180 00000000 00000000 80873b28 00000000 000000fc 82b3394c 00000000 2e34312e 6d6d6f43 809a180f 809a1836 6f6d203a 80900000 00000001 82b33bac 80900000 00027f80 00000000 00000000 807bb824 00000000 804ed790 001cc317 00000001 [...] Call Trace: [<80108f44>] show_stack+0x38/0x118 [<807a7aac>] dump_stack_lvl+0x5c/0x7c [<807a4b3c>] __warn+0xcc/0x140 [<807a4c3c>] warn_slowpath_fmt+0x8c/0xb8 [<8011e198>] build_insn+0x558/0x590 [<8011e358>] uasm_i_bne+0x20/0x2c [<80127b48>] build_body+0xa58/0x2a94 [<80129c98>] bpf_jit_compile+0x114/0x1e4 [<80613fc4>] bpf_prepare_filter+0x2ec/0x4e4 [<8061423c>] bpf_prog_create+0x80/0xc4 [] test_bpf_init+0x300/0xba8 [test_bpf] [<8010051c>] do_one_initcall+0x50/0x1d4 [<801c5e54>] do_init_module+0x60/0x220 [<801c8b20>] sys_finit_module+0xc4/0xfc [<801144d0>] syscall_common+0x34/0x58 [...] ---[ end trace a287d9742503c645 ]--- Then the anomalous machine code executes: => 0xc0a18000: addiu sp,sp,-16 0xc0a18004: sw s3,0(sp) 0xc0a18008: sw s4,4(sp) 0xc0a1800c: sw s5,8(sp) 0xc0a18010: sw ra,12(sp) 0xc0a18014: move s5,a0 0xc0a18018: move s4,zero 0xc0a1801c: move s3,zero # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a18020: lui t6,0x8012 0xc0a18024: ori t4,t6,0x9e14 0xc0a18028: li a1,0 0xc0a1802c: jalr t4 0xc0a18030: move a0,s5 0xc0a18034: bnez v0,0xc0a1ffb8 # incorrect branch offset 0xc0a18038: move v0,zero 0xc0a1803c: andi s4,s3,0xf 0xc0a18040: b 0xc0a18048 0xc0a18044: sll s4,s4,0x2 [...] # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a1ffa0: lui t6,0x8012 0xc0a1ffa4: ori t4,t6,0x9e14 0xc0a1ffa8: li a1,0 0xc0a1ffac: jalr t4 0xc0a1ffb0: move a0,s5 0xc0a1ffb4: bnez v0,0xc0a1ffb8 # incorrect branch offset 0xc0a1ffb8: move v0,zero 0xc0a1ffbc: andi s4,s3,0xf 0xc0a1ffc0: b 0xc0a1ffc8 0xc0a1ffc4: sll s4,s4,0x2 # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) 0xc0a1ffc8: lui t6,0x8012 0xc0a1ffcc: ori t4,t6,0x9e14 0xc0a1ffd0: li a1,0 0xc0a1ffd4: jalr t4 0xc0a1ffd8: move a0,s5 0xc0a1ffdc: bnez v0,0xc0a3ffb8 # correct branch offset 0xc0a1ffe0: move v0,zero 0xc0a1ffe4: andi s4,s3,0xf 0xc0a1ffe8: b 0xc0a1fff0 0xc0a1ffec: sll s4,s4,0x2 [...] # epilogue 0xc0a3ffb8: lw s3,0(sp) 0xc0a3ffbc: lw s4,4(sp) 0xc0a3ffc0: lw s5,8(sp) 0xc0a3ffc4: lw ra,12(sp) 0xc0a3ffc8: addiu sp,sp,16 0xc0a3ffcc: jr ra 0xc0a3ffd0: nop To mitigate this issue, we assert the branch ranges for each emit call that could generate an out-of-range branch. Fixes: 36366e367ee9 ("MIPS: BPF: Restore MIPS32 cBPF JIT") Fixes: c6610de353da ("MIPS: net: Add BPF JIT") Signed-off-by: Piotr Krysiuk Signed-off-by: Daniel Borkmann Tested-by: Johan Almbladh Acked-by: Johan Almbladh Cc: Paul Burton Cc: Thomas Bogendoerfer Link: https://lore.kernel.org/bpf/20210915160437.4080-1-piotras@gmail.com --- arch/mips/net/bpf_jit.c | 57 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0af88622c619..cb6d22439f71 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ func##_positive) +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + static int build_body(struct jit_ctx *ctx) { const struct bpf_prog *prog = ctx->skf; @@ -728,7 +733,10 @@ load_common: /* Load return register on DS for failures */ emit_reg_move(r_ret, r_zero, ctx); /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_LD | BPF_W | BPF_IND: @@ -775,8 +783,10 @@ load_ind: emit_jalr(MIPS_R_RA, r_s0, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ /* Check the error value */ - emit_bcond(MIPS_COND_NE, r_ret, 0, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); /* We are good */ /* X <- P[1:K] & 0xf */ @@ -855,8 +865,10 @@ load_ind: /* A /= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; @@ -864,8 +876,10 @@ load_ind: /* A %= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; @@ -926,7 +940,10 @@ load_ind: break; case BPF_JMP | BPF_JA: /* pc += K */ - emit_b(b_imm(i + k + 1, ctx), ctx); + b_off = b_imm(i + k + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -1056,12 +1073,16 @@ jmp_cmp: break; case BPF_RET | BPF_A: ctx->flags |= SEEN_A; - if (i != prog->len - 1) + if (i != prog->len - 1) { /* * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + } emit_reg_move(r_ret, r_A, ctx); /* delay slot */ break; case BPF_RET | BPF_K: @@ -1075,7 +1096,10 @@ jmp_cmp: * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); } break; @@ -1133,8 +1157,10 @@ jmp_cmp: /* Load *dev pointer */ emit_load_ptr(r_s0, r_skb, off, ctx); /* error (0) in the delay slot */ - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); if (code == (BPF_ANC | SKF_AD_IFINDEX)) { BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); @@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp) /* Generate the actual JIT code */ build_prologue(&ctx); - build_body(&ctx); + if (build_body(&ctx)) { + module_memfree(ctx.target); + goto out; + } build_epilogue(&ctx); /* Update the icache */ -- cgit v1.2.3-58-ga151 From 1511df6f5e9ef32826f20db2ee81f8527154dc14 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 7 Sep 2021 11:58:59 +0200 Subject: s390/bpf: Fix branch shortening during codegen pass EMIT6_PCREL() macro assumes that the previous pass generated 6 bytes of code, which is not the case if branch shortening took place. Fix by using jit->prg, like all the other EMIT6_PCREL_*() macros. Reported-by: Johan Almbladh Fixes: 4e9b4a6883dd ("s390/bpf: Use relative long branches") Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 88419263a89a..c3bd630e9b43 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -248,8 +248,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ - /* Branch instruction needs 6 bytes */ \ - int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\ + int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ -- cgit v1.2.3-58-ga151 From 6e61dc9da0b7a0d91d57c2e20b5ea4fd2d4e7e53 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 7 Sep 2021 13:41:16 +0200 Subject: s390/bpf: Fix 64-bit subtraction of the -0x80000000 constant The JIT uses agfi for subtracting constants, but -(-0x80000000) cannot be represented as a 32-bit signed binary integer. Fix by using algfi in this particular case. Reported-by: Johan Almbladh Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Reviewed-by: Heiko Carstens Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c3bd630e9b43..245f98d5f690 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -794,8 +794,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ if (!imm) break; - /* agfi %dst,-imm */ - EMIT6_IMM(0xc2080000, dst_reg, -imm); + if (imm == -0x80000000) { + /* algfi %dst,0x80000000 */ + EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000); + } else { + /* agfi %dst,-imm */ + EMIT6_IMM(0xc2080000, dst_reg, -imm); + } break; /* * BPF_MUL -- cgit v1.2.3-58-ga151 From db7bee653859ef7179be933e7d1384644f795f26 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 6 Sep 2021 15:04:14 +0200 Subject: s390/bpf: Fix optimizing out zero-extensions Currently the JIT completely removes things like `reg32 += 0`, however, the BPF_ALU semantics requires the target register to be zero-extended in such cases. Fix by optimizing out only the arithmetic operation, but not the subsequent zero-extension. Reported-by: Johan Almbladh Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Reviewed-by: Heiko Carstens Signed-off-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 58 +++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 245f98d5f690..840d8594437d 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -760,10 +760,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9080000, dst_reg, src_reg); break; case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */ - if (!imm) - break; - /* alfi %dst,imm */ - EMIT6_IMM(0xc20b0000, dst_reg, imm); + if (imm != 0) { + /* alfi %dst,imm */ + EMIT6_IMM(0xc20b0000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */ @@ -785,10 +785,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9090000, dst_reg, src_reg); break; case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */ - if (!imm) - break; - /* alfi %dst,-imm */ - EMIT6_IMM(0xc20b0000, dst_reg, -imm); + if (imm != 0) { + /* alfi %dst,-imm */ + EMIT6_IMM(0xc20b0000, dst_reg, -imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ @@ -815,10 +815,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb90c0000, dst_reg, src_reg); break; case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */ - if (imm == 1) - break; - /* msfi %r5,imm */ - EMIT6_IMM(0xc2010000, dst_reg, imm); + if (imm != 1) { + /* msfi %r5,imm */ + EMIT6_IMM(0xc2010000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */ @@ -871,6 +871,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (BPF_OP(insn->code) == BPF_MOD) /* lhgi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); + else + EMIT_ZERO(dst_reg); break; } /* lhi %w0,0 */ @@ -1003,10 +1005,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9820000, dst_reg, src_reg); break; case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */ - if (!imm) - break; - /* xilf %dst,imm */ - EMIT6_IMM(0xc0070000, dst_reg, imm); + if (imm != 0) { + /* xilf %dst,imm */ + EMIT6_IMM(0xc0070000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ @@ -1037,10 +1039,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */ - if (imm == 0) - break; - /* sll %dst,imm(%r0) */ - EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sll %dst,imm(%r0) */ + EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */ @@ -1062,10 +1064,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */ - if (imm == 0) - break; - /* srl %dst,imm(%r0) */ - EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* srl %dst,imm(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */ @@ -1087,10 +1089,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */ - if (imm == 0) - break; - /* sra %dst,imm(%r0) */ - EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sra %dst,imm(%r0) */ + EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */ -- cgit v1.2.3-58-ga151 From 4403f8062abecf24794e0fd3a3e424cc63ba6662 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 16 Sep 2021 17:05:29 +0200 Subject: xen/x86: drop redundant zeroing from cpu_initialize_context() Just after having obtained the pointer from kzalloc() there's no reason at all to set part of the area to all zero yet another time. Similarly there's no point explicitly clearing "ldt_ents". Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/14881835-a48e-29fa-0870-e177b10fcf65@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/smp_pv.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 96afadf9878e..7ed56c6075b0 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -290,8 +290,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_rw(cpu); - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - /* * Bring up the CPU in cpu_bringup_and_idle() with the stack * pointing just below where pt_regs would be if it were a normal @@ -308,8 +306,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) xen_copy_trap_info(ctxt->trap_ctxt); - ctxt->ldt_ents = 0; - BUG_ON((unsigned long)gdt & ~PAGE_MASK); gdt_mfn = arbitrary_virt_to_mfn(gdt); -- cgit v1.2.3-58-ga151 From 8e1034a526652f265ed993fab7f659eb8ae4b6f0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:49:04 +0200 Subject: xen/pci-swiotlb: reduce visibility of symbols xen_swiotlb and pci_xen_swiotlb_init() are only used within the file defining them, so make them static and remove the stubs. Otoh pci_xen_swiotlb_detect() has a use (as function pointer) from the main pci-swiotlb.c file - convert its stub to a #define to NULL. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/aef5fc33-9c02-4df0-906a-5c813142e13c@suse.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/swiotlb-xen.h | 6 +----- arch/x86/xen/pci-swiotlb-xen.c | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 6b56d0d45d15..66b4ddde7743 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -3,14 +3,10 @@ #define _ASM_X86_SWIOTLB_XEN_H #ifdef CONFIG_SWIOTLB_XEN -extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); -extern void __init pci_xen_swiotlb_init(void); extern int pci_xen_swiotlb_init_late(void); #else -#define xen_swiotlb (0) -static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -static inline void __init pci_xen_swiotlb_init(void) { } +#define pci_xen_swiotlb_detect NULL static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 54f9aa7e8457..46df59aeaa06 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -18,7 +18,7 @@ #endif #include -int xen_swiotlb __read_mostly; +static int xen_swiotlb __read_mostly; /* * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary @@ -56,7 +56,7 @@ int __init pci_xen_swiotlb_detect(void) return xen_swiotlb; } -void __init pci_xen_swiotlb_init(void) +static void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { xen_swiotlb_init_early(); -- cgit v1.2.3-58-ga151 From 794d5b8a497ff053f56856472e2fae038fa761aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 17 Sep 2021 12:50:38 +0200 Subject: swiotlb-xen: this is PV-only on x86 The code is unreachable for HVM or PVH, and it also makes little sense in auto-translated environments. On Arm, with xen_{create,destroy}_contiguous_region() both being stubs, I have a hard time seeing what good the Xen specific variant does - the generic one ought to be fine for all purposes there. Still Arm code explicitly references symbols here, so the code will continue to be included there. Instead of making PCI_XEN's "select" conditional, simply drop it - SWIOTLB_XEN will be available unconditionally in the PV case anyway, and is - as explained above - dead code in non-PV environments. This in turn allows dropping the stubs for xen_{create,destroy}_contiguous_region(), the former of which was broken anyway - it failed to set the DMA handle output. Signed-off-by: Jan Beulich Reviewed-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/5947b8ae-fdc7-225c-4838-84712265fc1e@suse.com Signed-off-by: Juergen Gross --- arch/x86/Kconfig | 1 - drivers/xen/Kconfig | 1 + include/xen/xen-ops.h | 12 ------------ 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 88fb922c23a0..a71ced4c711f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2605,7 +2605,6 @@ config PCI_OLPC config PCI_XEN def_bool y depends on PCI && XEN - select SWIOTLB_XEN config MMCONF_FAM10H def_bool y diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a37eb52fb401..22f5aff0c136 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -177,6 +177,7 @@ config XEN_GRANT_DMA_ALLOC config SWIOTLB_XEN def_bool y + depends on XEN_PV || ARM || ARM64 select DMA_OPS select SWIOTLB diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 39a5580f8feb..db28e79b77ee 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -46,19 +46,7 @@ extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); - void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); -#else -static inline int xen_create_contiguous_region(phys_addr_t pstart, - unsigned int order, - unsigned int address_bits, - dma_addr_t *dma_handle) -{ - return 0; -} - -static inline void xen_destroy_contiguous_region(phys_addr_t pstart, - unsigned int order) { } #endif #if defined(CONFIG_XEN_PV) -- cgit v1.2.3-58-ga151 From d8b1e10a2b8efaf71d151aa756052fbf2f3b6d57 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Sep 2021 10:56:32 -0700 Subject: sparc64: fix pci_iounmap() when CONFIG_PCI is not set Guenter reported [1] that the pci_iounmap() changes remain problematic, with sparc64 allnoconfig and tinyconfig still not building due to the header file changes and confusion with the arch-specific pci_iounmap() implementation. I'm pretty convinced that sparc should just use GENERIC_IOMAP instead of doing its own thing, since it turns out that the sparc64 version of pci_iounmap() is somewhat buggy (see [2]). But in the meantime, this just fixes the build by avoiding the trivial re-definition of the empty case. Link: https://lore.kernel.org/lkml/20210920134424.GA346531@roeck-us.net/ [1] Link: https://lore.kernel.org/lkml/CAHk-=wgheheFx9myQyy5osh79BAazvmvYURAtub2gQtMvLrhqQ@mail.gmail.com/ [2] Reported-by: Guenter Roeck Cc: David Miller Signed-off-by: Linus Torvalds --- arch/sparc/lib/iomap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694..f3a8cd491ce0 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif -- cgit v1.2.3-58-ga151 From d4ffd5df9d18031b6a53f934388726775b4452d3 Mon Sep 17 00:00:00 2001 From: Jiashuo Liang Date: Fri, 30 Jul 2021 11:01:52 +0800 Subject: x86/fault: Fix wrong signal when vsyscall fails with pkey The function __bad_area_nosemaphore() calls kernelmode_fixup_or_oops() with the parameter @signal being actually @pkey, which will send a signal numbered with the argument in @pkey. This bug can be triggered when the kernel fails to access user-given memory pages that are protected by a pkey, so it can go down the do_user_addr_fault() path and pass the !user_mode() check in __bad_area_nosemaphore(). Most cases will simply run the kernel fixup code to make an -EFAULT. But when another condition current->thread.sig_on_uaccess_err is met, which is only used to emulate vsyscall, the kernel will generate the wrong signal. Add a new parameter @pkey to kernelmode_fixup_or_oops() to fix this. [ bp: Massage commit message, fix build error as reported by the 0day bot: https://lkml.kernel.org/r/202109202245.APvuT8BX-lkp@intel.com ] Fixes: 5042d40a264c ("x86/fault: Bypass no_context() for implicit kernel faults from usermode") Reported-by: kernel test robot Signed-off-by: Jiashuo Liang Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: https://lkml.kernel.org/r/20210730030152.249106-1-liangjs@pku.edu.cn --- arch/x86/include/asm/pkeys.h | 2 -- arch/x86/mm/fault.c | 26 ++++++++++++++++++-------- include/linux/pkeys.h | 2 ++ 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 5c7bcaa79623..1d5f14aff5f6 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H -#define ARCH_DEFAULT_PKEY 0 - /* * If more than 16 keys are ever supported, a thorough audit * will be necessary to ensure that the types that store key diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b2eefdefc108..84a2c8c4af73 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,7 +710,8 @@ oops: static noinline void kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int signal, int si_code) + unsigned long address, int signal, int si_code, + u32 pkey) { WARN_ON_ONCE(user_mode(regs)); @@ -735,8 +736,12 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, set_signal_archinfo(address, error_code); - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); + if (si_code == SEGV_PKUERR) { + force_sig_pkuerr((void __user *)address, pkey); + } else { + /* XXX: hwpoison faults will set the wrong code. */ + force_sig_fault(signal, si_code, (void __user *)address); + } } /* @@ -798,7 +803,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, struct task_struct *tsk = current; if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGSEGV, si_code, pkey); return; } @@ -930,7 +936,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, { /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR); + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } @@ -1396,7 +1403,8 @@ good_area: */ if (!user_mode(regs)) kernelmode_fixup_or_oops(regs, error_code, address, - SIGBUS, BUS_ADRERR); + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); return; } @@ -1416,7 +1424,8 @@ good_area: return; if (fatal_signal_pending(current) && !user_mode(regs)) { - kernelmode_fixup_or_oops(regs, error_code, address, 0, 0); + kernelmode_fixup_or_oops(regs, error_code, address, + 0, 0, ARCH_DEFAULT_PKEY); return; } @@ -1424,7 +1433,8 @@ good_area: /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, - SIGSEGV, SEGV_MAPERR); + SIGSEGV, SEGV_MAPERR, + ARCH_DEFAULT_PKEY); return; } diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 6beb26b7151d..86be8bf27b41 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,6 +4,8 @@ #include +#define ARCH_DEFAULT_PKEY 0 + #ifdef CONFIG_ARCH_HAS_PKEYS #include #else /* ! CONFIG_ARCH_HAS_PKEYS */ -- cgit v1.2.3-58-ga151 From 0594c58161b6e0f3da8efa9c6e3d4ba52b652717 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 20 Sep 2021 18:15:11 +0200 Subject: xen/x86: fix PV trap handling on secondary processors The initial observation was that in PV mode under Xen 32-bit user space didn't work anymore. Attempts of system calls ended in #GP(0x402). All of the sudden the vector 0x80 handler was not in place anymore. As it turns out up to 5.13 redundant initialization did occur: Once from cpu_initialize_context() (through its VCPUOP_initialise hypercall) and a 2nd time while each CPU was brought fully up. This 2nd initialization is now gone, uncovering that the 1st one was flawed: Unlike for the set_trap_table hypercall, a full virtual IDT needs to be specified here; the "vector" fields of the individual entries are of no interest. With many (kernel) IDT entries still(?) (i.e. at that point at least) empty, the syscall vector 0x80 ended up in slot 0x20 of the virtual IDT, thus becoming the domain's handler for vector 0x20. Make xen_convert_trap_info() fit for either purpose, leveraging the fact that on the xen_copy_trap_info() path the table starts out zero-filled. This includes moving out the writing of the sentinel, which would also have lead to a buffer overrun in the xen_copy_trap_info() case if all (kernel) IDT entries were populated. Convert the writing of the sentinel to clearing of the entire table entry rather than just the address field. (I didn't bother trying to identify the commit which uncovered the issue in 5.14; the commit named below is the one which actually introduced the bad code.) Fixes: f87e4cac4f4e ("xen: SMP guest support") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/7a266932-092e-b68f-f2bb-1473b61adc6e@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 349f780a1567..6e0d0754f94f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -755,8 +755,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) preempt_enable(); } -static void xen_convert_trap_info(const struct desc_ptr *desc, - struct trap_info *traps) +static unsigned xen_convert_trap_info(const struct desc_ptr *desc, + struct trap_info *traps, bool full) { unsigned in, out, count; @@ -766,17 +766,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, for (in = out = 0; in < count; in++) { gate_desc *entry = (gate_desc *)(desc->address) + in; - if (cvt_gate_to_trap(in, entry, &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out]) || full) out++; } - traps[out].address = 0; + + return out; } void xen_copy_trap_info(struct trap_info *traps) { const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); - xen_convert_trap_info(desc, traps); + xen_convert_trap_info(desc, traps, true); } /* Load a new IDT into Xen. In principle this can be per-CPU, so we @@ -786,6 +787,7 @@ static void xen_load_idt(const struct desc_ptr *desc) { static DEFINE_SPINLOCK(lock); static struct trap_info traps[257]; + unsigned out; trace_xen_cpu_load_idt(desc); @@ -793,7 +795,8 @@ static void xen_load_idt(const struct desc_ptr *desc) memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); - xen_convert_trap_info(desc, traps); + out = xen_convert_trap_info(desc, traps, false); + memset(&traps[out], 0, sizeof(traps[0])); xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) -- cgit v1.2.3-58-ga151 From 8aa83e6395ce047a506f0b16edca45f36c1ae7f8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 20 Sep 2021 14:04:21 +0200 Subject: x86/setup: Call early_reserve_memory() earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit in Fixes introduced early_reserve_memory() to do all needed initial memblock_reserve() calls in one function. Unfortunately, the call of early_reserve_memory() is done too late for Xen dom0, as in some cases a Xen hook called by e820__memory_setup() will need those memory reservations to have happened already. Move the call of early_reserve_memory() before the call of e820__memory_setup() in order to avoid such problems. Fixes: a799c2bd29d1 ("x86/setup: Consolidate early memory reservations") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Tested-by: Marek Marczykowski-Górecki Tested-by: Nathan Chancellor Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210920120421.29276-1-jgross@suse.com --- arch/x86/kernel/setup.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 79f164141116..40ed44ead063 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -830,6 +830,20 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + /* + * Do some memory reservations *before* memory is added to memblock, so + * memblock allocations won't overwrite it. + * + * After this point, everything still needed from the boot loader or + * firmware or kernel text should be early reserved or marked not RAM in + * e820. All other memory is free game. + * + * This call needs to happen before e820__memory_setup() which calls the + * xen_memory_setup() on Xen dom0 which relies on the fact that those + * early reservations have happened already. + */ + early_reserve_memory(); + iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; e820__memory_setup(); parse_setup_data(); @@ -876,18 +890,6 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - /* - * Do some memory reservations *before* memory is added to - * memblock, so memblock allocations won't overwrite it. - * Do it after early param, so we could get (unlikely) panic from - * serial. - * - * After this point everything still needed from the boot loader or - * firmware or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - early_reserve_memory(); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux -- cgit v1.2.3-58-ga151 From 8c8a3b5bd960cd88f7655b5251dc28741e11f139 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 15 Sep 2021 12:03:35 -0700 Subject: arm64: add MTE supported check to thread switching and syscall entry/exit This lets us avoid doing unnecessary work on hardware that does not support MTE, and will allow us to freely use MTE instructions in the code called by mte_thread_switch(). Since this would mean that we do a redundant check in mte_check_tfsr_el1(), remove it and add two checks now required in its callers. This also avoids an unnecessary DSB+ISB sequence on the syscall exit path for hardware not supporting MTE. Fixes: 65812c6921cc ("arm64: mte: Enable async tag check fault") Cc: # 5.13.x Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I02fd000d1ef2c86c7d2952a7f099b254ec227a5d Link: https://lore.kernel.org/r/20210915190336.398390-1-pcc@google.com [catalin.marinas@arm.com: adjust the commit log slightly] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 6 ++++++ arch/arm64/kernel/mte.c | 10 ++++------ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 3f93b9e0b339..02511650cffe 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -99,11 +99,17 @@ void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { + if (!system_supports_mte()) + return; + mte_check_tfsr_el1(); } static inline void mte_check_tfsr_exit(void) { + if (!system_supports_mte()) + return; + /* * The asynchronous faults are sync'ed automatically with * TFSR_EL1 on kernel entry but for exit an explicit dsb() diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 9d314a3bad3b..e5e801bc5312 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -142,12 +142,7 @@ void mte_enable_kernel_async(void) #ifdef CONFIG_KASAN_HW_TAGS void mte_check_tfsr_el1(void) { - u64 tfsr_el1; - - if (!system_supports_mte()) - return; - - tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); + u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) { /* @@ -199,6 +194,9 @@ void mte_thread_init_user(void) void mte_thread_switch(struct task_struct *next) { + if (!system_supports_mte()) + return; + mte_update_sctlr_user(next); /* -- cgit v1.2.3-58-ga151 From 59a68d4138086c015ab8241c3267eec5550fbd44 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Sep 2021 15:59:30 +0100 Subject: arm64: Mitigate MTE issues with str{n}cmp() As with strlen(), the patches importing the updated str{n}cmp() implementations were originally developed and tested before the advent of CONFIG_KASAN_HW_TAGS, and have subsequently revealed not to be MTE-safe. Since in-kernel MTE is still a rather niche case, let it temporarily fall back to the generic C versions for correctness until we can figure out the best fix. Fixes: 758602c04409 ("arm64: Import latest version of Cortex Strings' strcmp") Fixes: 020b199bc70d ("arm64: Import latest version of Cortex Strings' strncmp") Cc: # 5.14.x Reported-by: Branislav Rankov Signed-off-by: Robin Murphy Acked-by: Mark Rutland Link: https://lore.kernel.org/r/34dc4d12eec0adae49b0ac927df642ed10089d40.1631890770.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 5 +++++ arch/arm64/include/asm/string.h | 2 ++ arch/arm64/lib/strcmp.S | 2 +- arch/arm64/lib/strncmp.S | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 89faca0e740d..bfa58409a4d4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -525,6 +525,11 @@ alternative_endif #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) #endif +#ifdef CONFIG_KASAN_HW_TAGS +#define EXPORT_SYMBOL_NOHWKASAN(name) +#else +#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name) +#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 3a3264ff47b9..95f7686b728d 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -12,11 +12,13 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); +#ifndef CONFIG_KASAN_HW_TAGS #define __HAVE_ARCH_STRCMP extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); +#endif #define __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index d7bee210a798..83bcad72ec97 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -173,4 +173,4 @@ L(done): ret SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOKASAN(strcmp) +EXPORT_SYMBOL_NOHWKASAN(strcmp) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 48d44f7fddb1..e42bcfcd37e6 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -258,4 +258,4 @@ L(ret0): ret SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOKASAN(strncmp) +EXPORT_SYMBOL_NOHWKASAN(strncmp) -- cgit v1.2.3-58-ga151 From d2b59bd4b06d84a4eadb520b0f71c62fe8ec0a62 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 21 Sep 2021 16:52:17 +0200 Subject: s390/qeth: fix deadlock during failing recovery Commit 0b9902c1fcc5 ("s390/qeth: fix deadlock during recovery") removed taking discipline_mutex inside qeth_do_reset(), fixing potential deadlocks. An error path was missed though, that still takes discipline_mutex and thus has the original deadlock potential. Intermittent deadlocks were seen when a qeth channel path is configured offline, causing a race between qeth_do_reset and ccwgroup_remove. Call qeth_set_offline() directly in the qeth_do_reset() error case and then a new variant of ccwgroup_set_offline(), without taking discipline_mutex. Fixes: b41b554c1ee7 ("s390/qeth: fix locking for discipline setup / removal") Signed-off-by: Alexandra Winter Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- arch/s390/include/asm/ccwgroup.h | 2 +- drivers/s390/cio/ccwgroup.c | 10 ++++++++-- drivers/s390/net/qeth_core_main.c | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 36dbf5043fc0..aa995d91cd1d 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -55,7 +55,7 @@ int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, int num_devices, const char *buf); extern int ccwgroup_set_online(struct ccwgroup_device *gdev); -extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); +int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv); extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 2ec741106cb6..f0538609dfe4 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -77,12 +77,13 @@ EXPORT_SYMBOL(ccwgroup_set_online); /** * ccwgroup_set_offline() - disable a ccwgroup device * @gdev: target ccwgroup device + * @call_gdrv: Call the registered gdrv set_offline function * * This function attempts to put the ccwgroup device into the offline state. * Returns: * %0 on success and a negative error value on failure. */ -int ccwgroup_set_offline(struct ccwgroup_device *gdev) +int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv) { struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); int ret = -EINVAL; @@ -91,11 +92,16 @@ int ccwgroup_set_offline(struct ccwgroup_device *gdev) return -EAGAIN; if (gdev->state == CCWGROUP_OFFLINE) goto out; + if (!call_gdrv) { + ret = 0; + goto offline; + } if (gdrv->set_offline) ret = gdrv->set_offline(gdev); if (ret) goto out; +offline: gdev->state = CCWGROUP_OFFLINE; out: atomic_set(&gdev->onoff, 0); @@ -124,7 +130,7 @@ static ssize_t ccwgroup_online_store(struct device *dev, if (value == 1) ret = ccwgroup_set_online(gdev); else if (value == 0) - ret = ccwgroup_set_offline(gdev); + ret = ccwgroup_set_offline(gdev, true); else ret = -EINVAL; out: diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9f26706051e5..e9807d2996a9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5514,7 +5514,8 @@ static int qeth_do_reset(void *data) dev_info(&card->gdev->dev, "Device successfully recovered!\n"); } else { - ccwgroup_set_offline(card->gdev); + qeth_set_offline(card, disc, true); + ccwgroup_set_offline(card->gdev, false); dev_warn(&card->gdev->dev, "The qeth device driver failed to recover an error on the device\n"); } -- cgit v1.2.3-58-ga151 From d81ff5fe14a950f53e2833cfa196e7bb3fd5d4e3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 10 Sep 2021 15:33:32 -0700 Subject: x86/asm: Fix SETZ size enqcmds() build failure When building under GCC 4.9 and 5.5: arch/x86/include/asm/special_insns.h: Assembler messages: arch/x86/include/asm/special_insns.h:286: Error: operand size mismatch for `setz' Change the type to "bool" for condition code arguments, as documented. Fixes: 7f5933f81bd8 ("x86/asm: Add an enqcmds() wrapper for the ENQCMDS instruction") Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210910223332.3224851-1-keescook@chromium.org --- arch/x86/include/asm/special_insns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index f3fbb84ff8a7..68c257a3de0d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -275,7 +275,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; - int zf; + bool zf; /* * ENQCMDS %(rdx), rax -- cgit v1.2.3-58-ga151 From 22b70e6f2da0a4c8b1421b00cfc3016bc9d4d9d4 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Thu, 23 Sep 2021 08:50:02 -0600 Subject: arm64: Restore forced disabling of KPTI on ThunderX A noted side-effect of commit 0c6c2d3615ef ("arm64: Generate cpucaps.h") is that cpucaps are now sorted, changing the enumeration order. This assumed no dependencies between cpucaps, which turned out not to be true in one case. UNMAP_KERNEL_AT_EL0 currently needs to be processed after WORKAROUND_CAVIUM_27456. ThunderX systems are incompatible with KPTI, so unmap_kernel_at_el0() bails if WORKAROUND_CAVIUM_27456 is set. But because of the sorting, WORKAROUND_CAVIUM_27456 will not yet have been considered when unmap_kernel_at_el0() checks for it, so the kernel tries to run w/ KPTI - and quickly falls over. Because all ThunderX implementations have homogeneous CPUs, we can remove this dependency by just checking the current CPU for the erratum. Fixes: 0c6c2d3615ef ("arm64: Generate cpucaps.h") Cc: # 5.13.x Signed-off-by: dann frazier Suggested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Mark Brown Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20210923145002.3394558-1-dann.frazier@canonical.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f8a3067d10c6..6ec7036ef7e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1526,9 +1526,13 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, /* * For reasons that aren't entirely clear, enabling KPTI on Cavium * ThunderX leads to apparent I-cache corruption of kernel text, which - * ends as well as you might imagine. Don't even try. + * ends as well as you might imagine. Don't even try. We cannot rely + * on the cpus_have_*cap() helpers here to detect the CPU erratum + * because cpucap detection order may change. However, since we know + * affected CPUs are always in a homogeneous configuration, it is + * safe to rely on this_cpu_has_cap() here. */ - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) { + if (this_cpu_has_cap(ARM64_WORKAROUND_CAVIUM_27456)) { str = "ARM64_WORKAROUND_CAVIUM_27456"; __kpti_forced = -1; } -- cgit v1.2.3-58-ga151 From 12064c1768439fa0882547010afae6b52aafa7af Mon Sep 17 00:00:00 2001 From: Jia He Date: Thu, 23 Sep 2021 11:35:57 +0800 Subject: Revert "ACPI: Add memory semantics to acpi_os_map_memory()" This reverts commit 437b38c51162f8b87beb28a833c4d5dc85fa864e. The memory semantics added in commit 437b38c51162 causes SystemMemory Operation region, whose address range is not described in the EFI memory map to be mapped as NormalNC memory on arm64 platforms (through acpi_os_map_memory() in acpi_ex_system_memory_space_handler()). This triggers the following abort on an ARM64 Ampere eMAG machine, because presumably the physical address range area backing the Opregion does not support NormalNC memory attributes driven on the bus. Internal error: synchronous external abort: 96000410 [#1] SMP Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0+ #462 Hardware name: MiTAC RAPTOR EV-883832-X3-0001/RAPTOR, BIOS 0.14 02/22/2019 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [...snip...] Call trace: acpi_ex_system_memory_space_handler+0x26c/0x2c8 acpi_ev_address_space_dispatch+0x228/0x2c4 acpi_ex_access_region+0x114/0x268 acpi_ex_field_datum_io+0x128/0x1b8 acpi_ex_extract_from_field+0x14c/0x2ac acpi_ex_read_data_from_field+0x190/0x1b8 acpi_ex_resolve_node_to_value+0x1ec/0x288 acpi_ex_resolve_to_value+0x250/0x274 acpi_ds_evaluate_name_path+0xac/0x124 acpi_ds_exec_end_op+0x90/0x410 acpi_ps_parse_loop+0x4ac/0x5d8 acpi_ps_parse_aml+0xe0/0x2c8 acpi_ps_execute_method+0x19c/0x1ac acpi_ns_evaluate+0x1f8/0x26c acpi_ns_init_one_device+0x104/0x140 acpi_ns_walk_namespace+0x158/0x1d0 acpi_ns_initialize_devices+0x194/0x218 acpi_initialize_objects+0x48/0x50 acpi_init+0xe0/0x498 If the Opregion address range is not present in the EFI memory map there is no way for us to determine the memory attributes to use to map it - defaulting to NormalNC does not work (and it is not correct on a memory region that may have read side-effects) and therefore commit 437b38c51162 should be reverted, which means reverting back to the original behavior whereby address ranges that are mapped using acpi_os_map_memory() default to the safe devicenGnRnE attributes on ARM64 if the mapped address range is not defined in the EFI memory map. Fixes: 437b38c51162 ("ACPI: Add memory semantics to acpi_os_map_memory()") Signed-off-by: Jia He Acked-by: Lorenzo Pieralisi Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- arch/arm64/include/asm/acpi.h | 3 --- arch/arm64/kernel/acpi.c | 19 +++---------------- drivers/acpi/osl.c | 23 +++++++---------------- include/acpi/acpi_io.h | 8 -------- 4 files changed, 10 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 7535dc7cc5aa..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -50,9 +50,6 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size); -#define acpi_os_memmap acpi_os_memmap - typedef u64 phys_cpuid_t; #define PHYS_CPUID_INVALID INVALID_HWID diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 1c9c2f7a1c04..f3851724fe35 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -273,8 +273,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } -static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, - acpi_size size, bool memory) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { efi_memory_desc_t *md, *region = NULL; pgprot_t prot; @@ -300,11 +299,9 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, * It is fine for AML to remap regions that are not represented in the * EFI memory map at all, as it only describes normal memory, and MMIO * regions that require a virtual mapping to make them accessible to - * the EFI runtime services. Determine the region default - * attributes by checking the requested memory semantics. + * the EFI runtime services. */ - prot = memory ? __pgprot(PROT_NORMAL_NC) : - __pgprot(PROT_DEVICE_nGnRnE); + prot = __pgprot(PROT_DEVICE_nGnRnE); if (region) { switch (region->type) { case EFI_LOADER_CODE: @@ -364,16 +361,6 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys, return __ioremap(phys, size, prot); } -void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, false); -} - -void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_ioremap(phys, size, true); -} - /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a43f1521efe6..45c5c0e45e33 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -284,8 +284,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) #define should_use_kmap(pfn) page_is_ram(pfn) #endif -static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, - bool memory) +static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz) { unsigned long pfn; @@ -295,8 +294,7 @@ static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz, return NULL; return (void __iomem __force *)kmap(pfn_to_page(pfn)); } else - return memory ? acpi_os_memmap(pg_off, pg_sz) : - acpi_os_ioremap(pg_off, pg_sz); + return acpi_os_ioremap(pg_off, pg_sz); } static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) @@ -311,10 +309,9 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) } /** - * __acpi_os_map_iomem - Get a virtual address for a given physical address range. + * acpi_os_map_iomem - Get a virtual address for a given physical address range. * @phys: Start of the physical address range to map. * @size: Size of the physical address range to map. - * @memory: true if remapping memory, false if IO * * Look up the given physical address range in the list of existing ACPI memory * mappings. If found, get a reference to it and return a pointer to it (its @@ -324,8 +321,8 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) * During early init (when acpi_permanent_mmap has not been set yet) this * routine simply calls __acpi_map_table() to get the job done. */ -static void __iomem __ref -*__acpi_os_map_iomem(acpi_physical_address phys, acpi_size size, bool memory) +void __iomem __ref +*acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; void __iomem *virt; @@ -356,7 +353,7 @@ static void __iomem __ref pg_off = round_down(phys, PAGE_SIZE); pg_sz = round_up(phys + size, PAGE_SIZE) - pg_off; - virt = acpi_map(phys, size, memory); + virt = acpi_map(phys, size); if (!virt) { mutex_unlock(&acpi_ioremap_lock); kfree(map); @@ -375,17 +372,11 @@ out: mutex_unlock(&acpi_ioremap_lock); return map->virt + (phys - map->phys); } - -void __iomem *__ref -acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) -{ - return __acpi_os_map_iomem(phys, size, false); -} EXPORT_SYMBOL_GPL(acpi_os_map_iomem); void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) { - return (void *)__acpi_os_map_iomem(phys, size, true); + return (void *)acpi_os_map_iomem(phys, size); } EXPORT_SYMBOL_GPL(acpi_os_map_memory); diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h index a0212e67d6f4..027faa8883aa 100644 --- a/include/acpi/acpi_io.h +++ b/include/acpi/acpi_io.h @@ -14,14 +14,6 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, } #endif -#ifndef acpi_os_memmap -static inline void __iomem *acpi_os_memmap(acpi_physical_address phys, - acpi_size size) -{ - return ioremap_cache(phys, size); -} -#endif - extern bool acpi_permanent_mmap; void __iomem __ref -- cgit v1.2.3-58-ga151 From 0e14ef38669ce4faa80589247fe8ed8a3780f414 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 21 Sep 2021 22:40:26 -0700 Subject: crypto: x86/sm4 - Fix frame pointer stack corruption sm4_aesni_avx_crypt8() sets up the frame pointer (which includes pushing RBP) before doing a conditional sibling call to sm4_aesni_avx_crypt4(), which sets up an additional frame pointer. Things will not go well when sm4_aesni_avx_crypt4() pops only the innermost single frame pointer and then tries to return to the outermost frame pointer. Sibling calls need to occur with an empty stack frame. Do the conditional sibling call *before* setting up the stack pointer. This fixes the following warning: arch/x86/crypto/sm4-aesni-avx-asm_64.o: warning: objtool: sm4_aesni_avx_crypt8()+0x8: sibling call from callable instruction with modified stack frame Fixes: a7ee22ee1445 ("crypto: x86/sm4 - add AES-NI/AVX/x86_64 implementation") Reported-by: kernel test robot Reported-by: Arnd Bergmann Acked-by: Peter Zijlstra (Intel) Reviewed-by: Tianjia Zhang Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sm4-aesni-avx-asm_64.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index fa2c3f50aecb..18d2f5199194 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -367,10 +367,11 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8) * %rdx: src (1..8 blocks) * %rcx: num blocks (1..8) */ - FRAME_BEGIN - cmpq $5, %rcx; jb sm4_aesni_avx_crypt4; + + FRAME_BEGIN + vmovdqu (0 * 16)(%rdx), RA0; vmovdqu (1 * 16)(%rdx), RA1; vmovdqu (2 * 16)(%rdx), RA2; -- cgit v1.2.3-58-ga151 From 5ba1071f7554c4027bdbd712a146111de57918de Mon Sep 17 00:00:00 2001 From: Numfor Mbiziwo-Tiapo Date: Thu, 23 Sep 2021 09:18:43 -0700 Subject: x86/insn, tools/x86: Fix undefined behavior due to potential unaligned accesses Don't perform unaligned loads in __get_next() and __peek_nbyte_next() as these are forms of undefined behavior: "A pointer to an object or incomplete type may be converted to a pointer to a different object or incomplete type. If the resulting pointer is not correctly aligned for the pointed-to type, the behavior is undefined." (from http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf) These problems were identified using the undefined behavior sanitizer (ubsan) with the tools version of the code and perf test. [ bp: Massage commit message. ] Signed-off-by: Numfor Mbiziwo-Tiapo Signed-off-by: Ian Rogers Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20210923161843.751834-1-irogers@google.com --- arch/x86/lib/insn.c | 4 ++-- tools/arch/x86/lib/insn.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 058f19b20465..c565def611e2 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -37,10 +37,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index c41f95815480..797699462cd8 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -37,10 +37,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) -- cgit v1.2.3-58-ga151 From 4bb0bd81ce5e97092dfda6a106d414b703ec0ee8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2021 17:19:00 +0000 Subject: m68k: Handle arrivals of multiple signals correctly When we have several pending signals, have entered with the kernel with large exception frame *and* have already built at least one sigframe, regs->stkadj is going to be non-zero and regs->format/sr/pc are going to be junk - the real values are in shifted exception stack frame we'd built when putting together the first sigframe. If that happens, subsequent sigframes are going to be garbage. Not hard to fix - just need to find the "adjusted" frame first and look for format/vector/sr/pc in it. Signed-off-by: Al Viro Tested-by: Michael Schmitz Reviewed-by: Michael Schmitz Tested-by: Finn Thain Link: https://lore.kernel.org/r/YP2dBIAPTaVvHiZ6@zeniv-ca.linux.org.uk Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/signal.c | 88 ++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 8f215e79e70e..cd11eb101eac 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -447,7 +447,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs) if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) { fpu_version = sc->sc_fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -510,7 +510,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs * if (!(CPU_IS_060 || CPU_IS_COLDFIRE)) context_size = fpstate[1]; fpu_version = fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -832,18 +832,24 @@ badframe: return 0; } +static inline struct pt_regs *rte_regs(struct pt_regs *regs) +{ + return (void *)regs + regs->stkadj; +} + static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, unsigned long mask) { + struct pt_regs *tregs = rte_regs(regs); sc->sc_mask = mask; sc->sc_usp = rdusp(); sc->sc_d0 = regs->d0; sc->sc_d1 = regs->d1; sc->sc_a0 = regs->a0; sc->sc_a1 = regs->a1; - sc->sc_sr = regs->sr; - sc->sc_pc = regs->pc; - sc->sc_formatvec = regs->format << 12 | regs->vector; + sc->sc_sr = tregs->sr; + sc->sc_pc = tregs->pc; + sc->sc_formatvec = tregs->format << 12 | tregs->vector; save_a5_state(sc, regs); save_fpu_state(sc, regs); } @@ -851,6 +857,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; + struct pt_regs *tregs = rte_regs(regs); greg_t __user *gregs = uc->uc_mcontext.gregs; int err = 0; @@ -871,9 +878,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs * err |= __put_user(sw->a5, &gregs[13]); err |= __put_user(sw->a6, &gregs[14]); err |= __put_user(rdusp(), &gregs[15]); - err |= __put_user(regs->pc, &gregs[16]); - err |= __put_user(regs->sr, &gregs[17]); - err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec); + err |= __put_user(tregs->pc, &gregs[16]); + err |= __put_user(tregs->sr, &gregs[17]); + err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec); err |= rt_save_fpu_state(uc, regs); return err; } @@ -890,13 +897,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); struct sigcontext context; int err = 0, sig = ksig->sig; if (fsize < 0) { pr_debug("setup_frame: Unknown frame format %#x\n", - regs->format); + tregs->format); return -EFAULT; } @@ -907,7 +915,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, err |= __put_user(sig, &frame->sig); - err |= __put_user(regs->vector, &frame->code); + err |= __put_user(tregs->vector, &frame->code); err |= __put_user(&frame->sc, &frame->psc); if (_NSIG_WORDS > 1) @@ -933,34 +941,28 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); + return 0; } @@ -968,7 +970,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); int err = 0, sig = ksig->sig; if (fsize < 0) { @@ -1018,34 +1021,27 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); return 0; } -- cgit v1.2.3-58-ga151 From 50e43a57334400668952f8e551c9d87d3ed2dfef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2021 17:19:45 +0000 Subject: m68k: Update ->thread.esp0 before calling syscall_trace() in ret_from_signal We get there when sigreturn has performed obscene acts on kernel stack; in particular, the location of pt_regs has shifted. We are about to call syscall_trace(), which might stop for tracer. If that happens, we'd better have task_pt_regs() returning correct result... Fucked-up-by: Al Viro Fixes: bd6f56a75bb2 ("m68k: Missing syscall_trace() on sigreturn") Signed-off-by: Al Viro Tested-by: Michael Schmitz Reviewed-by: Michael Schmitz Tested-by: Finn Thain Link: https://lore.kernel.org/r/YP2dMWeV1LkHiOpr@zeniv-ca.linux.org.uk Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 9dd76fbb7c6b..ff9e842cec0f 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -186,6 +186,8 @@ ENTRY(ret_from_signal) movel %curptr@(TASK_STACK),%a1 tstb %a1@(TINFO_FLAGS+2) jge 1f + lea %sp@(SWITCH_STACK_SIZE),%a1 + movel %a1,%curptr@(TASK_THREAD+THREAD_ESP0) jbsr syscall_trace 1: RESTORE_SWITCH_STACK addql #4,%sp -- cgit v1.2.3-58-ga151 From 0d20abde987bed05a8963c8aa4276019d54ff9e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2021 17:20:13 +0000 Subject: m68k: Leave stack mangling to asm wrapper of sigreturn() sigreturn has to deal with an unpleasant problem - exception stack frames have different sizes, depending upon the exception (and processor model, as well) and variable-sized part of exception frame may contain information needed for instruction restart. So when signal handler terminates and calls sigreturn to resume the execution at the place where we'd been when we caught the signal, it has to rearrange the frame at the bottom of kernel stack. Worse, it might need to open a gap in the kernel stack, shifting pt_regs towards lower addresses. Doing that from C is insane - we'd need to shift stack frames (return addresses, local variables, etc.) of C call chain, right under the nose of compiler and hope it won't fall apart horribly. What had been actually done is only slightly less insane - an inline asm in mangle_kernel_stack() moved the stuff around, then reset stack pointer and jumped to label in asm glue. However, we can avoid all that mess if the asm wrapper we have to use anyway would reserve some space on the stack between switch_stack and the C stack frame of do_{rt_,}sigreturn(). Then C part can simply memmove() pt_regs + switch_stack, memcpy() the variable part of exception frame into the opened gap - all of that without inline asm, buggering C call chain, magical jumps to asm labels, etc. Asm wrapper would need to know where the moved switch_stack has ended up - it might have been shifted into the gap we'd reserved before do_rt_sigreturn() call. That's where it needs to set the stack pointer to. So let the C part return just that and be done with that. While we are at it, the call of berr_040cleanup() we need to do when returning via 68040 bus error exception frame can be moved into C part as well. Signed-off-by: Al Viro Tested-by: Michael Schmitz Reviewed-by: Michael Schmitz Tested-by: Finn Thain Link: https://lore.kernel.org/r/YP2dTQPm1wGPWFgD@zeniv-ca.linux.org.uk Signed-off-by: Geert Uytterhoeven --- arch/m68k/68000/entry.S | 3 -- arch/m68k/coldfire/entry.S | 3 -- arch/m68k/include/asm/traps.h | 4 ++ arch/m68k/kernel/entry.S | 55 ++++++++++----------- arch/m68k/kernel/signal.c | 111 ++++++++++++++++-------------------------- 5 files changed, 71 insertions(+), 105 deletions(-) (limited to 'arch') diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S index 259b3661b614..cce465e850fe 100644 --- a/arch/m68k/68000/entry.S +++ b/arch/m68k/68000/entry.S @@ -25,7 +25,6 @@ .globl system_call .globl resume .globl ret_from_exception -.globl ret_from_signal .globl sys_call_table .globl bad_interrupt .globl inthandler1 @@ -59,8 +58,6 @@ do_trace: subql #4,%sp /* dummy return address */ SAVE_SWITCH_STACK jbsr syscall_trace_leave - -ret_from_signal: RESTORE_SWITCH_STACK addql #4,%sp jra ret_from_exception diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S index d43a02795a4a..68adb7b5b296 100644 --- a/arch/m68k/coldfire/entry.S +++ b/arch/m68k/coldfire/entry.S @@ -51,7 +51,6 @@ sw_usp: .globl system_call .globl resume .globl ret_from_exception -.globl ret_from_signal .globl sys_call_table .globl inthandler @@ -98,8 +97,6 @@ ENTRY(system_call) subql #4,%sp /* dummy return address */ SAVE_SWITCH_STACK jbsr syscall_trace_leave - -ret_from_signal: RESTORE_SWITCH_STACK addql #4,%sp diff --git a/arch/m68k/include/asm/traps.h b/arch/m68k/include/asm/traps.h index 4aff3358fbaf..a9d5c1c870d3 100644 --- a/arch/m68k/include/asm/traps.h +++ b/arch/m68k/include/asm/traps.h @@ -267,6 +267,10 @@ struct frame { } un; }; +#ifdef CONFIG_M68040 +asmlinkage void berr_040cleanup(struct frame *fp); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _M68K_TRAPS_H */ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index ff9e842cec0f..8fa9822b5922 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -78,20 +78,38 @@ ENTRY(__sys_clone3) ENTRY(sys_sigreturn) SAVE_SWITCH_STACK - movel %sp,%sp@- | switch_stack pointer - pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer + movel %sp,%a1 | switch_stack pointer + lea %sp@(SWITCH_STACK_SIZE),%a0 | pt_regs pointer + lea %sp@(-84),%sp | leave a gap + movel %a1,%sp@- + movel %a0,%sp@- jbsr do_sigreturn - addql #8,%sp - RESTORE_SWITCH_STACK - rts + jra 1f | shared with rt_sigreturn() ENTRY(sys_rt_sigreturn) SAVE_SWITCH_STACK - movel %sp,%sp@- | switch_stack pointer - pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer + movel %sp,%a1 | switch_stack pointer + lea %sp@(SWITCH_STACK_SIZE),%a0 | pt_regs pointer + lea %sp@(-84),%sp | leave a gap + movel %a1,%sp@- + movel %a0,%sp@- + | stack contents: + | [original pt_regs address] [original switch_stack address] + | [gap] [switch_stack] [pt_regs] [exception frame] jbsr do_rt_sigreturn - addql #8,%sp + +1: + | stack contents now: + | [original pt_regs address] [original switch_stack address] + | [unused part of the gap] [moved switch_stack] [moved pt_regs] + | [replacement exception frame] + | return value of do_{rt_,}sigreturn() points to moved switch_stack. + + movel %d0,%sp | discard the leftover junk RESTORE_SWITCH_STACK + | stack contents now is just [syscall return address] [pt_regs] [frame] + | return pt_regs.d0 + movel %sp@(PT_OFF_D0+4),%d0 rts ENTRY(buserr) @@ -182,27 +200,6 @@ do_trace_exit: addql #4,%sp jra .Lret_from_exception -ENTRY(ret_from_signal) - movel %curptr@(TASK_STACK),%a1 - tstb %a1@(TINFO_FLAGS+2) - jge 1f - lea %sp@(SWITCH_STACK_SIZE),%a1 - movel %a1,%curptr@(TASK_THREAD+THREAD_ESP0) - jbsr syscall_trace -1: RESTORE_SWITCH_STACK - addql #4,%sp -/* on 68040 complete pending writebacks if any */ -#ifdef CONFIG_M68040 - bfextu %sp@(PT_OFF_FORMATVEC){#0,#4},%d0 - subql #7,%d0 | bus error frame ? - jbne 1f - movel %sp,%sp@- - jbsr berr_040cleanup - addql #4,%sp -1: -#endif - jra .Lret_from_exception - ENTRY(system_call) SAVE_ALL_SYS diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index cd11eb101eac..338817d0cb3f 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -641,56 +641,35 @@ static inline void siginfo_build_tests(void) static int mangle_kernel_stack(struct pt_regs *regs, int formatvec, void __user *fp) { - int fsize = frame_extra_sizes(formatvec >> 12); - if (fsize < 0) { + int extra = frame_extra_sizes(formatvec >> 12); + char buf[sizeof_field(struct frame, un)]; + + if (extra < 0) { /* * user process trying to return with weird frame format */ pr_debug("user process returning with weird frame format\n"); - return 1; + return -1; } - if (!fsize) { - regs->format = formatvec >> 12; - regs->vector = formatvec & 0xfff; - } else { - struct switch_stack *sw = (struct switch_stack *)regs - 1; - /* yes, twice as much as max(sizeof(frame.un.fmt)) */ - unsigned long buf[sizeof_field(struct frame, un) / 2]; - - /* that'll make sure that expansion won't crap over data */ - if (copy_from_user(buf + fsize / 4, fp, fsize)) - return 1; - - /* point of no return */ - regs->format = formatvec >> 12; - regs->vector = formatvec & 0xfff; -#define frame_offset (sizeof(struct pt_regs)+sizeof(struct switch_stack)) - __asm__ __volatile__ ( -#ifdef CONFIG_COLDFIRE - " movel %0,%/sp\n\t" - " bra ret_from_signal\n" -#else - " movel %0,%/a0\n\t" - " subl %1,%/a0\n\t" /* make room on stack */ - " movel %/a0,%/sp\n\t" /* set stack pointer */ - /* move switch_stack and pt_regs */ - "1: movel %0@+,%/a0@+\n\t" - " dbra %2,1b\n\t" - " lea %/sp@(%c3),%/a0\n\t" /* add offset of fmt */ - " lsrl #2,%1\n\t" - " subql #1,%1\n\t" - /* copy to the gap we'd made */ - "2: movel %4@+,%/a0@+\n\t" - " dbra %1,2b\n\t" - " bral ret_from_signal\n" + if (extra && copy_from_user(buf, fp, extra)) + return -1; + regs->format = formatvec >> 12; + regs->vector = formatvec & 0xfff; + if (extra) { + void *p = (struct switch_stack *)regs - 1; + struct frame *new = (void *)regs - extra; + int size = sizeof(struct pt_regs)+sizeof(struct switch_stack); + + memmove(p - extra, p, size); + memcpy(p - extra + size, buf, extra); + current->thread.esp0 = (unsigned long)&new->ptregs; +#ifdef CONFIG_M68040 + /* on 68040 complete pending writebacks if any */ + if (new->ptregs.format == 7) // bus error frame + berr_040cleanup(new); #endif - : /* no outputs, it doesn't ever return */ - : "a" (sw), "d" (fsize), "d" (frame_offset/4-1), - "n" (frame_offset), "a" (buf + fsize/4) - : "a0"); -#undef frame_offset } - return 0; + return extra; } static inline int @@ -698,7 +677,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u { int formatvec; struct sigcontext context; - int err = 0; siginfo_build_tests(); @@ -707,7 +685,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u /* get previous context */ if (copy_from_user(&context, usc, sizeof(context))) - goto badframe; + return -1; /* restore passed registers */ regs->d0 = context.sc_d0; @@ -720,15 +698,10 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u wrusp(context.sc_usp); formatvec = context.sc_formatvec; - err = restore_fpu_state(&context); - - if (err || mangle_kernel_stack(regs, formatvec, fp)) - goto badframe; - - return 0; + if (restore_fpu_state(&context)) + return -1; -badframe: - return 1; + return mangle_kernel_stack(regs, formatvec, fp); } static inline int @@ -745,7 +718,7 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw, err = __get_user(temp, &uc->uc_mcontext.version); if (temp != MCONTEXT_VERSION) - goto badframe; + return -1; /* restore passed registers */ err |= __get_user(regs->d0, &gregs[0]); err |= __get_user(regs->d1, &gregs[1]); @@ -774,22 +747,17 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw, err |= restore_altstack(&uc->uc_stack); if (err) - goto badframe; - - if (mangle_kernel_stack(regs, temp, &uc->uc_extra)) - goto badframe; + return -1; - return 0; - -badframe: - return 1; + return mangle_kernel_stack(regs, temp, &uc->uc_extra); } -asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) +asmlinkage void *do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) { unsigned long usp = rdusp(); struct sigframe __user *frame = (struct sigframe __user *)(usp - 4); sigset_t set; + int size; if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -801,20 +769,22 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc, frame + 1)) + size = restore_sigcontext(regs, &frame->sc, frame + 1); + if (size < 0) goto badframe; - return regs->d0; + return (void *)sw - size; badframe: force_sig(SIGSEGV); - return 0; + return sw; } -asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) +asmlinkage void *do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) { unsigned long usp = rdusp(); struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4); sigset_t set; + int size; if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -823,13 +793,14 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) set_current_blocked(&set); - if (rt_restore_ucontext(regs, sw, &frame->uc)) + size = rt_restore_ucontext(regs, sw, &frame->uc); + if (size < 0) goto badframe; - return regs->d0; + return (void *)sw - size; badframe: force_sig(SIGSEGV); - return 0; + return sw; } static inline struct pt_regs *rte_regs(struct pt_regs *regs) -- cgit v1.2.3-58-ga151 From 1dc4027bc8b524ed03c4db391cd7910eb4ee19d2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:00 +0200 Subject: m68k: Document that access_ok is broken for !CONFIG_CPU_HAS_ADDRESS_SPACES Document that access_ok is completely broken for coldfire and friends at the moment. Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-2-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/uaccess.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index f98208ccbbcd..610bfe8d64d5 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -16,6 +16,10 @@ static inline int access_ok(const void __user *addr, unsigned long size) { + /* + * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check + * for TASK_SIZE! + */ return 1; } -- cgit v1.2.3-58-ga151 From c4f607c3124e4d2f33604f933b29496ce4111753 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:01 +0200 Subject: m68k: Remove the 030 case in virt_to_phys_slow The 030 case in virt_to_phys_slow can't ever be reached, so remove it. Suggested-by: Michael Schmitz Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-3-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/mm/cache.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/m68k/mm/cache.c b/arch/m68k/mm/cache.c index b486c0889eec..e7c1cabbfac4 100644 --- a/arch/m68k/mm/cache.c +++ b/arch/m68k/mm/cache.c @@ -49,24 +49,7 @@ static unsigned long virt_to_phys_slow(unsigned long vaddr) if (mmusr & MMU_R_040) return (mmusr & PAGE_MASK) | (vaddr & ~PAGE_MASK); } else { - unsigned short mmusr; - unsigned long *descaddr; - - asm volatile ("ptestr %3,%2@,#7,%0\n\t" - "pmove %%psr,%1" - : "=a&" (descaddr), "=m" (mmusr) - : "a" (vaddr), "d" (get_fs().seg)); - if (mmusr & (MMU_I|MMU_B|MMU_L)) - return 0; - descaddr = phys_to_virt((unsigned long)descaddr); - switch (mmusr & MMU_NUM) { - case 1: - return (*descaddr & 0xfe000000) | (vaddr & 0x01ffffff); - case 2: - return (*descaddr & 0xfffc0000) | (vaddr & 0x0003ffff); - case 3: - return (*descaddr & PAGE_MASK) | (vaddr & ~PAGE_MASK); - } + WARN_ON_ONCE(!CPU_IS_040_OR_060); } return 0; } -- cgit v1.2.3-58-ga151 From 25d2cae4a5578695f667e868ada38b0b73eb1080 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:02 +0200 Subject: m68k: Use BUILD_BUG for passing invalid sizes to get_user/put_user Simplify the handling a bit by using the common helper instead of referencing undefined symbols. Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-4-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/uaccess.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index 610bfe8d64d5..01334a9658c4 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -39,9 +39,6 @@ static inline int access_ok(const void __user *addr, #define MOVES "move" #endif -extern int __put_user_bad(void); -extern int __get_user_bad(void); - #define __put_user_asm(res, x, ptr, bwl, reg, err) \ asm volatile ("\n" \ "1: "MOVES"."#bwl" %2,%1\n" \ @@ -105,8 +102,7 @@ asm volatile ("\n" \ break; \ } \ default: \ - __pu_err = __put_user_bad(); \ - break; \ + BUILD_BUG(); \ } \ __pu_err; \ }) @@ -179,8 +175,7 @@ asm volatile ("\n" \ break; \ } \ default: \ - __gu_err = __get_user_bad(); \ - break; \ + BUILD_BUG(); \ } \ __gu_err; \ }) -- cgit v1.2.3-58-ga151 From 01eec1af5ec49b331948ace8f2287580e1594383 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:03 +0200 Subject: m68k: Factor the 8-byte lowlevel {get,put}_user code into helpers Add new helpers for doing the grunt work of the 8-byte {get,put}_user routines to allow for better reuse. Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-5-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/uaccess.h | 111 ++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index 01334a9658c4..288ef7d11a7a 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -57,6 +57,31 @@ asm volatile ("\n" \ : "+d" (res), "=m" (*(ptr)) \ : #reg (x), "i" (err)) +#define __put_user_asm8(res, x, ptr) \ +do { \ + const void *__pu_ptr = (const void __force *)(ptr); \ + \ + asm volatile ("\n" \ + "1: "MOVES".l %2,(%1)+\n" \ + "2: "MOVES".l %R2,(%1)\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .even\n" \ + "10: movel %3,%0\n" \ + " jra 3b\n" \ + " .previous\n" \ + "\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,10b\n" \ + " .long 2b,10b\n" \ + " .long 3b,10b\n" \ + " .previous" \ + : "+d" (res), "+a" (__pu_ptr) \ + : "r" (x), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -78,29 +103,8 @@ asm volatile ("\n" \ __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \ break; \ case 8: \ - { \ - const void __user *__pu_ptr = (ptr); \ - asm volatile ("\n" \ - "1: "MOVES".l %2,(%1)+\n" \ - "2: "MOVES".l %R2,(%1)\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .even\n" \ - "10: movel %3,%0\n" \ - " jra 3b\n" \ - " .previous\n" \ - "\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,10b\n" \ - " .long 2b,10b\n" \ - " .long 3b,10b\n" \ - " .previous" \ - : "+d" (__pu_err), "+a" (__pu_ptr) \ - : "r" (__pu_val), "i" (-EFAULT) \ - : "memory"); \ + __put_user_asm8(__pu_err, __pu_val, ptr); \ break; \ - } \ default: \ BUILD_BUG(); \ } \ @@ -130,6 +134,38 @@ asm volatile ("\n" \ (x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val; \ }) +#define __get_user_asm8(res, x, ptr) \ +do { \ + const void *__gu_ptr = (const void __force *)(ptr); \ + union { \ + u64 l; \ + __typeof__(*(ptr)) t; \ + } __gu_val; \ + \ + asm volatile ("\n" \ + "1: "MOVES".l (%2)+,%1\n" \ + "2: "MOVES".l (%2),%R1\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .even\n" \ + "10: move.l %3,%0\n" \ + " sub.l %1,%1\n" \ + " sub.l %R1,%R1\n" \ + " jra 3b\n" \ + " .previous\n" \ + "\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,10b\n" \ + " .long 2b,10b\n" \ + " .previous" \ + : "+d" (res), "=&r" (__gu_val.l), \ + "+a" (__gu_ptr) \ + : "i" (-EFAULT) \ + : "memory"); \ + (x) = __gu_val.t; \ +} while (0) + #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ @@ -144,36 +180,9 @@ asm volatile ("\n" \ case 4: \ __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \ break; \ - case 8: { \ - const void __user *__gu_ptr = (ptr); \ - union { \ - u64 l; \ - __typeof__(*(ptr)) t; \ - } __gu_val; \ - asm volatile ("\n" \ - "1: "MOVES".l (%2)+,%1\n" \ - "2: "MOVES".l (%2),%R1\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .even\n" \ - "10: move.l %3,%0\n" \ - " sub.l %1,%1\n" \ - " sub.l %R1,%R1\n" \ - " jra 3b\n" \ - " .previous\n" \ - "\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,10b\n" \ - " .long 2b,10b\n" \ - " .previous" \ - : "+d" (__gu_err), "=&r" (__gu_val.l), \ - "+a" (__gu_ptr) \ - : "i" (-EFAULT) \ - : "memory"); \ - (x) = __gu_val.t; \ + case 8: \ + __get_user_asm8(__gu_err, x, ptr); \ break; \ - } \ default: \ BUILD_BUG(); \ } \ -- cgit v1.2.3-58-ga151 From 8ade83390930d61c64fe3ab49081990c9d43d0d2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:04 +0200 Subject: m68k: Provide __{get,put}_kernel_nofault Allow non-faulting access to kernel addresses without overriding the address space. Implemented by passing the instruction name to the low-level assembly macros as an argument, and force the use of the normal move instructions for kernel access. Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-6-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/uaccess.h | 105 ++++++++++++++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index 288ef7d11a7a..65581a7874d4 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -39,9 +39,9 @@ static inline int access_ok(const void __user *addr, #define MOVES "move" #endif -#define __put_user_asm(res, x, ptr, bwl, reg, err) \ +#define __put_user_asm(inst, res, x, ptr, bwl, reg, err) \ asm volatile ("\n" \ - "1: "MOVES"."#bwl" %2,%1\n" \ + "1: "inst"."#bwl" %2,%1\n" \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .even\n" \ @@ -57,13 +57,13 @@ asm volatile ("\n" \ : "+d" (res), "=m" (*(ptr)) \ : #reg (x), "i" (err)) -#define __put_user_asm8(res, x, ptr) \ +#define __put_user_asm8(inst, res, x, ptr) \ do { \ const void *__pu_ptr = (const void __force *)(ptr); \ \ asm volatile ("\n" \ - "1: "MOVES".l %2,(%1)+\n" \ - "2: "MOVES".l %R2,(%1)\n" \ + "1: "inst".l %2,(%1)+\n" \ + "2: "inst".l %R2,(%1)\n" \ "3:\n" \ " .section .fixup,\"ax\"\n" \ " .even\n" \ @@ -94,16 +94,16 @@ do { \ __chk_user_ptr(ptr); \ switch (sizeof (*(ptr))) { \ case 1: \ - __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \ + __put_user_asm(MOVES, __pu_err, __pu_val, ptr, b, d, -EFAULT); \ break; \ case 2: \ - __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \ + __put_user_asm(MOVES, __pu_err, __pu_val, ptr, w, r, -EFAULT); \ break; \ case 4: \ - __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \ + __put_user_asm(MOVES, __pu_err, __pu_val, ptr, l, r, -EFAULT); \ break; \ case 8: \ - __put_user_asm8(__pu_err, __pu_val, ptr); \ + __put_user_asm8(MOVES, __pu_err, __pu_val, ptr); \ break; \ default: \ BUILD_BUG(); \ @@ -113,10 +113,10 @@ do { \ #define put_user(x, ptr) __put_user(x, ptr) -#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({ \ +#define __get_user_asm(inst, res, x, ptr, type, bwl, reg, err) ({ \ type __gu_val; \ asm volatile ("\n" \ - "1: "MOVES"."#bwl" %2,%1\n" \ + "1: "inst"."#bwl" %2,%1\n" \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .even\n" \ @@ -134,7 +134,7 @@ do { \ (x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val; \ }) -#define __get_user_asm8(res, x, ptr) \ +#define __get_user_asm8(inst, res, x, ptr) \ do { \ const void *__gu_ptr = (const void __force *)(ptr); \ union { \ @@ -143,8 +143,8 @@ do { \ } __gu_val; \ \ asm volatile ("\n" \ - "1: "MOVES".l (%2)+,%1\n" \ - "2: "MOVES".l (%2),%R1\n" \ + "1: "inst".l (%2)+,%1\n" \ + "2: "inst".l (%2),%R1\n" \ "3:\n" \ " .section .fixup,\"ax\"\n" \ " .even\n" \ @@ -172,16 +172,16 @@ do { \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT); \ + __get_user_asm(MOVES, __gu_err, x, ptr, u8, b, d, -EFAULT); \ break; \ case 2: \ - __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT); \ + __get_user_asm(MOVES, __gu_err, x, ptr, u16, w, r, -EFAULT); \ break; \ case 4: \ - __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \ + __get_user_asm(MOVES, __gu_err, x, ptr, u32, l, r, -EFAULT); \ break; \ case 8: \ - __get_user_asm8(__gu_err, x, ptr); \ + __get_user_asm8(MOVES, __gu_err, x, ptr); \ break; \ default: \ BUILD_BUG(); \ @@ -330,16 +330,19 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) switch (n) { case 1: - __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1); + __put_user_asm(MOVES, res, *(u8 *)from, (u8 __user *)to, + b, d, 1); break; case 2: - __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2); + __put_user_asm(MOVES, res, *(u16 *)from, (u16 __user *)to, + w, r, 2); break; case 3: __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,); break; case 4: - __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4); + __put_user_asm(MOVES, res, *(u32 *)from, (u32 __user *)to, + l, r, 4); break; case 5: __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,); @@ -388,6 +391,66 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + type *__gk_dst = (type *)(dst); \ + type *__gk_src = (type *)(src); \ + int __gk_err = 0; \ + \ + switch (sizeof(type)) { \ + case 1: \ + __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \ + u8, b, d, -EFAULT); \ + break; \ + case 2: \ + __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \ + u16, w, r, -EFAULT); \ + break; \ + case 4: \ + __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \ + u32, l, r, -EFAULT); \ + break; \ + case 8: \ + __get_user_asm8("move", __gk_err, *__gk_dst, __gk_src); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + if (unlikely(__gk_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + type __pk_src = *(type *)(src); \ + type *__pk_dst = (type *)(dst); \ + int __pk_err = 0; \ + \ + switch (sizeof(type)) { \ + case 1: \ + __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \ + b, d, -EFAULT); \ + break; \ + case 2: \ + __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \ + w, r, -EFAULT); \ + break; \ + case 4: \ + __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \ + l, r, -EFAULT); \ + break; \ + case 8: \ + __put_user_asm8("move", __pk_err, __pk_src, __pk_dst); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + if (unlikely(__pk_err)) \ + goto err_label; \ +} while (0) + #define user_addr_max() \ (uaccess_kernel() ? ~0UL : TASK_SIZE) -- cgit v1.2.3-58-ga151 From 9fde0348640252c79d462c4d29a09a14e8741f5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2021 09:04:05 +0200 Subject: m68k: Remove set_fs() Add a m68k-only set_fc helper to set the SFC and DFC registers for the few places that need to override it for special MM operations, but disconnect that from the deprecated kernel-wide set_fs() API. Note that the SFC/DFC registers are context switched, so there is no need to disable preemption. Partially based on an earlier patch from Linus Torvalds . Signed-off-by: Christoph Hellwig Reviewed-by: Michael Schmitz Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20210916070405.52750-7-hch@lst.de Signed-off-by: Geert Uytterhoeven --- arch/m68k/68000/entry.S | 1 - arch/m68k/Kconfig | 1 - arch/m68k/coldfire/entry.S | 1 - arch/m68k/include/asm/processor.h | 31 +++++++++++++++++-- arch/m68k/include/asm/segment.h | 59 ------------------------------------- arch/m68k/include/asm/thread_info.h | 3 -- arch/m68k/include/asm/tlbflush.h | 11 ++----- arch/m68k/include/asm/uaccess.h | 4 --- arch/m68k/kernel/asm-offsets.c | 2 +- arch/m68k/kernel/entry.S | 5 ++-- arch/m68k/kernel/process.c | 4 +-- arch/m68k/kernel/traps.c | 13 +++----- arch/m68k/mac/misc.c | 1 - arch/m68k/mm/cache.c | 6 ++-- arch/m68k/mm/init.c | 6 ---- arch/m68k/mm/kmap.c | 1 - arch/m68k/mm/memory.c | 1 - arch/m68k/mm/motorola.c | 2 +- arch/m68k/sun3/config.c | 3 +- arch/m68k/sun3/mmu_emu.c | 6 ++-- arch/m68k/sun3/sun3ints.c | 1 - arch/m68k/sun3x/prom.c | 1 - 22 files changed, 46 insertions(+), 117 deletions(-) delete mode 100644 arch/m68k/include/asm/segment.h (limited to 'arch') diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S index cce465e850fe..997b54933015 100644 --- a/arch/m68k/68000/entry.S +++ b/arch/m68k/68000/entry.S @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 774c35f47eea..0b50da08a9c5 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -29,7 +29,6 @@ config M68K select NO_DMA if !MMU && !COLDFIRE select OLD_SIGACTION select OLD_SIGSUSPEND3 - select SET_FS select UACCESS_MEMCPY if !MMU select VIRT_TO_BUS select ZONE_DMA diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S index 68adb7b5b296..9f337c70243a 100644 --- a/arch/m68k/coldfire/entry.S +++ b/arch/m68k/coldfire/entry.S @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 3750819ac5a1..f4d82c619a5c 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -9,7 +9,6 @@ #define __ASM_M68K_PROCESSOR_H #include -#include #include #include @@ -75,11 +74,37 @@ static inline void wrusp(unsigned long usp) #define TASK_UNMAPPED_BASE 0 #endif +/* Address spaces (or Function Codes in Motorola lingo) */ +#define USER_DATA 1 +#define USER_PROGRAM 2 +#define SUPER_DATA 5 +#define SUPER_PROGRAM 6 +#define CPU_SPACE 7 + +#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES +/* + * Set the SFC/DFC registers for special MM operations. For most normal + * operation these remain set to USER_DATA for the uaccess routines. + */ +static inline void set_fc(unsigned long val) +{ + WARN_ON_ONCE(in_interrupt()); + + __asm__ __volatile__ ("movec %0,%/sfc\n\t" + "movec %0,%/dfc\n\t" + : /* no outputs */ : "r" (val) : "memory"); +} +#else +static inline void set_fc(unsigned long val) +{ +} +#endif /* CONFIG_CPU_HAS_ADDRESS_SPACES */ + struct thread_struct { unsigned long ksp; /* kernel stack pointer */ unsigned long usp; /* user stack pointer */ unsigned short sr; /* saved status register */ - unsigned short fs; /* saved fs (sfc, dfc) */ + unsigned short fc; /* saved fc (sfc, dfc) */ unsigned long crp[2]; /* cpu root pointer */ unsigned long esp0; /* points to SR of stack frame */ unsigned long faddr; /* info about last fault */ @@ -92,7 +117,7 @@ struct thread_struct { #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) init_stack, \ .sr = PS_S, \ - .fs = __KERNEL_DS, \ + .fc = USER_DATA, \ } /* diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h deleted file mode 100644 index 2b5e68a71ef7..000000000000 --- a/arch/m68k/include/asm/segment.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _M68K_SEGMENT_H -#define _M68K_SEGMENT_H - -/* define constants */ -/* Address spaces (FC0-FC2) */ -#define USER_DATA (1) -#ifndef __USER_DS -#define __USER_DS (USER_DATA) -#endif -#define USER_PROGRAM (2) -#define SUPER_DATA (5) -#ifndef __KERNEL_DS -#define __KERNEL_DS (SUPER_DATA) -#endif -#define SUPER_PROGRAM (6) -#define CPU_SPACE (7) - -#ifndef __ASSEMBLY__ - -typedef struct { - unsigned long seg; -} mm_segment_t; - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES -/* - * Get/set the SFC/DFC registers for MOVES instructions - */ -#define USER_DS MAKE_MM_SEG(__USER_DS) -#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS) - -static inline mm_segment_t get_fs(void) -{ - mm_segment_t _v; - __asm__ ("movec %/dfc,%0":"=r" (_v.seg):); - return _v; -} - -static inline void set_fs(mm_segment_t val) -{ - __asm__ __volatile__ ("movec %0,%/sfc\n\t" - "movec %0,%/dfc\n\t" - : /* no outputs */ : "r" (val.seg) : "memory"); -} - -#else -#define USER_DS MAKE_MM_SEG(TASK_SIZE) -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) -#endif - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) - -#endif /* __ASSEMBLY__ */ - -#endif /* _M68K_SEGMENT_H */ diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 15a757073fa5..c952658ba792 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -4,7 +4,6 @@ #include #include -#include /* * On machines with 4k pages we default to an 8k thread size, though we @@ -27,7 +26,6 @@ struct thread_info { struct task_struct *task; /* main task structure */ unsigned long flags; - mm_segment_t addr_limit; /* thread address space */ int preempt_count; /* 0 => preemptable, <0 => BUG */ __u32 cpu; /* should always be 0 on m68k */ unsigned long tp_value; /* thread pointer */ @@ -37,7 +35,6 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ - .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ } diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h index a6318ccd308f..b882e2f4f551 100644 --- a/arch/m68k/include/asm/tlbflush.h +++ b/arch/m68k/include/asm/tlbflush.h @@ -13,13 +13,12 @@ static inline void flush_tlb_kernel_page(void *addr) if (CPU_IS_COLDFIRE) { mmu_write(MMUOR, MMUOR_CNL); } else if (CPU_IS_040_OR_060) { - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); + set_fc(SUPER_DATA); __asm__ __volatile__(".chip 68040\n\t" "pflush (%0)\n\t" ".chip 68k" : : "a" (addr)); - set_fs(old_fs); + set_fc(USER_DATA); } else if (CPU_IS_020_OR_030) __asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr)); } @@ -84,12 +83,8 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - if (vma->vm_mm == current->active_mm) { - mm_segment_t old_fs = force_uaccess_begin(); - + if (vma->vm_mm == current->active_mm) __flush_tlb_one(addr); - force_uaccess_end(old_fs); - } } static inline void flush_tlb_range(struct vm_area_struct *vma, diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index 65581a7874d4..ba670523885c 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -9,7 +9,6 @@ */ #include #include -#include #include /* We let the MMU do all checking */ @@ -451,9 +450,6 @@ do { \ goto err_label; \ } while (0) -#define user_addr_max() \ - (uaccess_kernel() ? ~0UL : TASK_SIZE) - extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c index ccea355052ef..906d73230537 100644 --- a/arch/m68k/kernel/asm-offsets.c +++ b/arch/m68k/kernel/asm-offsets.c @@ -31,7 +31,7 @@ int main(void) DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp)); DEFINE(THREAD_USP, offsetof(struct thread_struct, usp)); DEFINE(THREAD_SR, offsetof(struct thread_struct, sr)); - DEFINE(THREAD_FS, offsetof(struct thread_struct, fs)); + DEFINE(THREAD_FC, offsetof(struct thread_struct, fc)); DEFINE(THREAD_CRP, offsetof(struct thread_struct, crp)); DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0)); DEFINE(THREAD_FPREG, offsetof(struct thread_struct, fp)); diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 8fa9822b5922..9434fca68de5 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -337,7 +336,7 @@ resume: /* save fs (sfc,%dfc) (may be pointing to kernel memory) */ movec %sfc,%d0 - movew %d0,%a0@(TASK_THREAD+THREAD_FS) + movew %d0,%a0@(TASK_THREAD+THREAD_FC) /* save usp */ /* it is better to use a movel here instead of a movew 8*) */ @@ -423,7 +422,7 @@ resume: movel %a0,%usp /* restore fs (sfc,%dfc) */ - movew %a1@(TASK_THREAD+THREAD_FS),%a0 + movew %a1@(TASK_THREAD+THREAD_FC),%a0 movec %a0,%sfc movec %a0,%dfc diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index db49f9091711..1ab692b952cd 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -92,7 +92,7 @@ void show_regs(struct pt_regs * regs) void flush_thread(void) { - current->thread.fs = __USER_DS; + current->thread.fc = USER_DATA; #ifdef CONFIG_FPU if (!FPU_IS_EMU) { unsigned long zero = 0; @@ -155,7 +155,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, * Must save the current SFC/DFC value, NOT the value when * the parent was last descheduled - RGH 10-08-96 */ - p->thread.fs = get_fs().seg; + p->thread.fc = USER_DATA; if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 5b19fcdcd69e..9718ce94cc84 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -181,9 +181,8 @@ static inline void access_error060 (struct frame *fp) static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs) { unsigned long mmusr; - mm_segment_t old_fs = get_fs(); - set_fs(MAKE_MM_SEG(wbs)); + set_fc(wbs); if (iswrite) asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr)); @@ -192,7 +191,7 @@ static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs) asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr)); - set_fs(old_fs); + set_fc(USER_DATA); return mmusr; } @@ -201,10 +200,8 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba, unsigned long wbd) { int res = 0; - mm_segment_t old_fs = get_fs(); - /* set_fs can not be moved, otherwise put_user() may oops */ - set_fs(MAKE_MM_SEG(wbs)); + set_fc(wbs); switch (wbs & WBSIZ_040) { case BA_SIZE_BYTE: @@ -218,9 +215,7 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba, break; } - /* set_fs can not be moved, otherwise put_user() may oops */ - set_fs(old_fs); - + set_fc(USER_DATA); pr_debug("do_040writeback1, res=%d\n", res); diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 90f4e9ca1276..4fab34791758 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mm/cache.c b/arch/m68k/mm/cache.c index e7c1cabbfac4..dde978e66f14 100644 --- a/arch/m68k/mm/cache.c +++ b/arch/m68k/mm/cache.c @@ -90,11 +90,9 @@ void flush_icache_user_range(unsigned long address, unsigned long endaddr) void flush_icache_range(unsigned long address, unsigned long endaddr) { - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); + set_fc(SUPER_DATA); flush_icache_user_range(address, endaddr); - set_fs(old_fs); + set_fc(USER_DATA); } EXPORT_SYMBOL(flush_icache_range); diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 5d749e188246..1b47bec15832 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -72,12 +72,6 @@ void __init paging_init(void) if (!empty_zero_page) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); - - /* - * Set up SFC/DFC registers (user data space). - */ - set_fs (USER_DS); - max_zone_pfn[ZONE_DMA] = end_mem >> PAGE_SHIFT; free_area_init(max_zone_pfn); } diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c index 1269d513b221..20ddf71b43d0 100644 --- a/arch/m68k/mm/kmap.c +++ b/arch/m68k/mm/kmap.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c index fe75aecfb238..c2c03b0a1567 100644 --- a/arch/m68k/mm/memory.c +++ b/arch/m68k/mm/memory.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 3a653f0a4188..9f3f77785aa7 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -467,7 +467,7 @@ void __init paging_init(void) /* * Set up SFC/DFC registers */ - set_fs(KERNEL_DS); + set_fc(USER_DATA); #ifdef DEBUG printk ("before free_area_init\n"); diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index f7dd47232b6c..203f428a0344 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -31,7 +31,6 @@ #include #include #include -#include #include char sun3_reserved_pmeg[SUN3_PMEGS_NUM]; @@ -89,7 +88,7 @@ void __init sun3_init(void) sun3_reserved_pmeg[249] = 1; sun3_reserved_pmeg[252] = 1; sun3_reserved_pmeg[253] = 1; - set_fs(KERNEL_DS); + set_fc(USER_DATA); } /* Without this, Bad Things happen when something calls arch_reset. */ diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index 7aa879b7c7ff..7ec20817c0c9 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -191,14 +190,13 @@ void __init mmu_emu_init(unsigned long bootmem_end) for(seg = 0; seg < PAGE_OFFSET; seg += SUN3_PMEG_SIZE) sun3_put_segmap(seg, SUN3_INVALID_PMEG); - set_fs(MAKE_MM_SEG(3)); + set_fc(3); for(seg = 0; seg < 0x10000000; seg += SUN3_PMEG_SIZE) { i = sun3_get_segmap(seg); for(j = 1; j < CONTEXTS_NUM; j++) (*(romvec->pv_setctxt))(j, (void *)seg, i); } - set_fs(KERNEL_DS); - + set_fc(USER_DATA); } /* erase the mappings for a dead context. Uses the pg_dir for hints diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index 41ae422119d3..36cc280a4505 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/sun3x/prom.c b/arch/m68k/sun3x/prom.c index 74d2fe57524b..64c23bfaa90c 100644 --- a/arch/m68k/sun3x/prom.c +++ b/arch/m68k/sun3x/prom.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-58-ga151 From adfc8f9d2f9fefd880abc82cfbf62cbfe6539c97 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 23 Sep 2021 17:29:39 -0700 Subject: NIOS2: fix kconfig unmet dependency warning for SERIAL_CORE_CONSOLE SERIAL_CORE_CONSOLE depends on TTY so EARLY_PRINTK should also depend on TTY so that it does not select SERIAL_CORE_CONSOLE inadvertently. WARNING: unmet direct dependencies detected for SERIAL_CORE_CONSOLE Depends on [n]: TTY [=n] && HAS_IOMEM [=y] Selected by [y]: - EARLY_PRINTK [=y] Fixes: e8bf5bc776ed ("nios2: add early printk support") Signed-off-by: Randy Dunlap Cc: Dinh Nguyen Signed-off-by: Dinh Nguyen --- arch/nios2/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug index a8bc06e96ef5..ca1beb87f987 100644 --- a/arch/nios2/Kconfig.debug +++ b/arch/nios2/Kconfig.debug @@ -3,9 +3,10 @@ config EARLY_PRINTK bool "Activate early kernel debugging" default y + depends on TTY select SERIAL_CORE_CONSOLE help - Enable early printk on console + Enable early printk on console. This is useful for kernel debugging when your machine crashes very early before the console code is initialized. You should normally say N here, unless you want to debug such a crash. -- cgit v1.2.3-58-ga151 From e8e9f1e6327005be9656aa135aeb9dfdaf6b3032 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 24 Sep 2021 15:43:57 -0700 Subject: sh: pgtable-3level: fix cast to pointer from integer of different size If X2TLB=y (CPU_SHX2=y or CPU_SHX3=y, e.g. migor_defconfig), pgd_t.pgd is "unsigned long long", causing: In file included from arch/sh/include/asm/pgtable.h:13, from include/linux/pgtable.h:6, from include/linux/mm.h:33, from arch/sh/kernel/asm-offsets.c:14: arch/sh/include/asm/pgtable-3level.h: In function `pud_pgtable': arch/sh/include/asm/pgtable-3level.h:37:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 37 | return (pmd_t *)pud_val(pud); | ^ Fix this by adding an intermediate cast to "unsigned long", which is basically what the old code did before. Link: https://lkml.kernel.org/r/2c2eef3c9a2f57e5609100a4864715ccf253d30f.1631713483.git.geert+renesas@glider.be Fixes: 9cf6fa2458443118 ("mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t *") Signed-off-by: Geert Uytterhoeven Tested-by: Daniel Palmer Acked-by: Rob Landley Cc: Yoshinori Sato Cc: Rich Felker Cc: "Aneesh Kumar K . V" Cc: Jacopo Mondi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 56bf35c2f29c..cdced80a7ffa 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -34,7 +34,7 @@ typedef struct { unsigned long long pmd; } pmd_t; static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)pud_val(pud); + return (pmd_t *)(unsigned long)pud_val(pud); } /* only used by the stubbed out hugetlb gup code, should never be called */ -- cgit v1.2.3-58-ga151 From 9523b33cc31cf8ce703f8facee9fd16cba36d5ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Sep 2021 14:05:25 -0700 Subject: NIOS2: setup.c: drop unused variable 'dram_start' This is a nuisance when CONFIG_WERROR is set, so drop the variable declaration since the code that used it was removed. ../arch/nios2/kernel/setup.c: In function 'setup_arch': ../arch/nios2/kernel/setup.c:152:13: warning: unused variable 'dram_start' [-Wunused-variable] 152 | int dram_start; Fixes: 7f7bc20bc41a ("nios2: Don't use _end for calculating min_low_pfn") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Reviewed-by: Mike Rapoport Cc: Andreas Oetken Signed-off-by: Dinh Nguyen --- arch/nios2/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index cf8d687a2644..40bc8fb75e0b 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -149,8 +149,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low, void __init setup_arch(char **cmdline_p) { - int dram_start; - console_verbose(); memory_start = memblock_start_of_DRAM(); -- cgit v1.2.3-58-ga151 From ced185824c89b60e65b5a2606954c098320cdfb8 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 27 Sep 2021 13:11:57 +0000 Subject: bpf, x86: Fix bpf mapping of atomic fetch implementation Fix the case where the dst register maps to %rax as otherwise this produces an incorrect mapping with the implementation in 981f94c3e921 ("bpf: Add bitwise atomic instructions") as %rax is clobbered given it's part of the cmpxchg as operand. The issue is similar to b29dd96b905f ("bpf, x86: Fix BPF_FETCH atomic and/or/ xor with r0 as src") just that the case of dst register was missed. Before, dst=r0 (%rax) src=r2 (%rsi): [...] c5: mov %rax,%r10 c8: mov 0x0(%rax),%rax <---+ (broken) cc: mov %rax,%r11 | cf: and %rsi,%r11 | d2: lock cmpxchg %r11,0x0(%rax) <---+ d8: jne 0x00000000000000c8 | da: mov %rax,%rsi | dd: mov %r10,%rax | [...] | | After, dst=r0 (%rax) src=r2 (%rsi): | | [...] | da: mov %rax,%r10 | dd: mov 0x0(%r10),%rax <---+ (fixed) e1: mov %rax,%r11 | e4: and %rsi,%r11 | e7: lock cmpxchg %r11,0x0(%r10) <---+ ed: jne 0x00000000000000dd ef: mov %rax,%rsi f2: mov %r10,%rax [...] The remaining combinations were fine as-is though: After, dst=r9 (%r15) src=r0 (%rax): [...] dc: mov %rax,%r10 df: mov 0x0(%r15),%rax e3: mov %rax,%r11 e6: and %r10,%r11 e9: lock cmpxchg %r11,0x0(%r15) ef: jne 0x00000000000000df _ f1: mov %rax,%r10 | (unneeded, but f4: mov %r10,%rax _| not a problem) [...] After, dst=r9 (%r15) src=r4 (%rcx): [...] de: mov %rax,%r10 e1: mov 0x0(%r15),%rax e5: mov %rax,%r11 e8: and %rcx,%r11 eb: lock cmpxchg %r11,0x0(%r15) f1: jne 0x00000000000000e1 f3: mov %rax,%rcx f6: mov %r10,%rax [...] The case of dst == src register is rejected by the verifier and therefore not supported, but x86 JIT also handles this case just fine. After, dst=r0 (%rax) src=r0 (%rax): [...] eb: mov %rax,%r10 ee: mov 0x0(%r10),%rax f2: mov %rax,%r11 f5: and %r10,%r11 f8: lock cmpxchg %r11,0x0(%r10) fe: jne 0x00000000000000ee 100: mov %rax,%r10 103: mov %r10,%rax [...] Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions") Reported-by: Johan Almbladh Signed-off-by: Johan Almbladh Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Acked-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d24a512fd6f3..9ea57389c554 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off)) if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) { - u8 *branch_target; bool is64 = BPF_SIZE(insn->code) == BPF_DW; u32 real_src_reg = src_reg; + u32 real_dst_reg = dst_reg; + u8 *branch_target; /* * Can't be implemented with a single x86 insn. @@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off)) emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); if (src_reg == BPF_REG_0) real_src_reg = BPF_REG_AX; + if (dst_reg == BPF_REG_0) + real_dst_reg = BPF_REG_AX; branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), - BPF_REG_0, dst_reg, insn->off); + BPF_REG_0, real_dst_reg, insn->off); /* * Perform the (commutative) operation locally, * put the result in the AUX_REG. @@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off)) add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic(&prog, BPF_CMPXCHG, - dst_reg, AUX_REG, insn->off, + real_dst_reg, AUX_REG, + insn->off, BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; @@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off)) /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; - } err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; -- cgit v1.2.3-58-ga151 From 02d029a41dc986e2d5a77ecca45803857b346829 Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Wed, 29 Sep 2021 17:04:21 +1000 Subject: perf/x86: Reset destroy callback on event init failure perf_init_event tries multiple init callbacks and does not reset the event state between tries. When x86_pmu_event_init runs, it unconditionally sets the destroy callback to hw_perf_event_destroy. On the next init attempt after x86_pmu_event_init, in perf_try_init_event, if the pmu's capabilities includes PERF_PMU_CAP_NO_EXCLUDE, the destroy callback will be run. However, if the next init didn't set the destroy callback, hw_perf_event_destroy will be run (since the callback wasn't reset). Looking at other pmu init functions, the common pattern is to only set the destroy callback on a successful init. Resetting the callback on failure tries to replicate that pattern. This was discovered after commit f11dd0d80555 ("perf/x86/amd/ibs: Extend PERF_PMU_CAP_NO_EXCLUDE to IBS Op") when the second (and only second) run of the perf tool after a reboot results in 0 samples being generated. The extra run of hw_perf_event_destroy results in active_events having an extra decrement on each perf run. The second run has active_events == 0 and every subsequent run has active_events < 0. When active_events == 0, the NMI handler will early-out and not record any samples. Signed-off-by: Anand K Mistry Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210929170405.1.I078b98ee7727f9ae9d6df8262bad7e325e40faf0@changeid --- arch/x86/events/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2a57dbed4894..6dfa8ddaa60f 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2465,6 +2465,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (READ_ONCE(x86_pmu.attr_rdpmc) && -- cgit v1.2.3-58-ga151 From ecc2123e09f9e71ddc6c53d71e283b8ada685fe2 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 Sep 2021 08:19:03 -0700 Subject: perf/x86/intel: Update event constraints for ICX According to the latest event list, the event encoding 0xEF is only available on the first 4 counters. Add it into the event constraints table. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1632842343-25862-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7011e87be6d0..9a044438072b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -263,6 +263,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), EVENT_CONSTRAINT_END }; -- cgit v1.2.3-58-ga151 From 291073a566b2094c7192872cc0f17ce73d83cb76 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Oct 2021 13:34:19 -0700 Subject: kvm: fix objtool relocation warning The recent change to make objtool aware of more symbol relocation types (commit 24ff65257375: "objtool: Teach get_alt_entry() about more relocation types") also added another check, and resulted in this objtool warning when building kvm on x86: arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception The reason seems to be that kvm_fastop_exception() is marked as a global symbol, which causes the relocation to ke kept around for objtool. And at the same time, the kvm_fastop_exception definition (which is done as an inline asm statement) doesn't actually set the type of the global, which then makes objtool unhappy. The minimal fix is to just not mark kvm_fastop_exception as being a global symbol. It's only used in that one compilation unit anyway, so it was always pointless. That's how all the other local exception table labels are done. I'm not entirely happy about the kinds of games that the kvm code plays with doing its own exception handling, and the fact that it confused objtool is most definitely a symptom of the code being a bit too subtle and ad-hoc. But at least this trivial one-liner makes objtool no longer upset about what is going on. Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types") Link: https://lore.kernel.org/lkml/CAHk-=wiZwq-0LknKhXN4M+T8jbxn_2i9mcKpO+OaBSSq_Eh7tg@mail.gmail.com/ Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Nathan Chancellor Signed-off-by: Linus Torvalds --- arch/x86/kvm/emulate.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c589ac832265..9a144ca8e146 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -435,7 +435,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_RET(#op) asm(".pushsection .fixup, \"ax\"\n" - ".global kvm_fastop_exception \n" "kvm_fastop_exception: xor %esi, %esi; ret\n" ".popsection"); -- cgit v1.2.3-58-ga151 From a78738ed1d9bf40d09109599b884508c69d188b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 8 Oct 2021 14:58:34 +0100 Subject: KVM: arm64: Turn __KVM_HOST_SMCCC_FUNC_* into an enum (mostly) __KVM_HOST_SMCCC_FUNC_* is a royal pain, as there is a fair amount of churn around these #defines, and we avoid making it an enum only for the sake of the early init, low level code that requires __KVM_HOST_SMCCC_FUNC___kvm_hyp_init to be usable from assembly. Let's be brave and turn everything but this symbol into an enum, using a bit of arithmetic to avoid any overlap. Acked-by: Will Deacon Link: https://lore.kernel.org/r/877depq9gw.wl-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20211008135839.1193-2-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 44 ++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..43b5e213ae43 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -44,31 +44,35 @@ #define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name) #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 -#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 -#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 -#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 -#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 -#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 -#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16 -#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 -#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 -#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 -#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 #ifndef __ASSEMBLY__ #include +enum __kvm_host_smccc_func { + /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ + __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, + __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, + __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, + __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, + __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, + __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, + __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, + __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, +}; + #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] -- cgit v1.2.3-58-ga151 From 8f4566f18db5d1257fc2d5442e16274424a529c1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:35 +0100 Subject: arm64: Prevent kexec and hibernation if is_protected_kvm_enabled() When pKVM is enabled, the hypervisor code at EL2 and its data structures are inaccessible to the host kernel and cannot be torn down or replaced as this would defeat the integrity properies which pKVM aims to provide. Furthermore, the ABI between the host and EL2 is flexible and private to whatever the current implementation of KVM requires and so booting a new kernel with an old EL2 component is very likely to end in disaster. In preparation for uninstalling the hyp stub calls which are relied upon to reset EL2, disable kexec and hibernation in the host when protected KVM is enabled. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-3-will@kernel.org --- arch/arm64/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6f6ff072acbd..44369b99a57e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1128,5 +1128,6 @@ bool cpus_are_stuck_in_kernel(void) { bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); - return !!cpus_stuck_in_kernel || smp_spin_tables; + return !!cpus_stuck_in_kernel || smp_spin_tables || + is_protected_kvm_enabled(); } -- cgit v1.2.3-58-ga151 From 8579a185bacaa64c65e43e251ceede2f7600f7e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:36 +0100 Subject: KVM: arm64: Reject stub hypercalls after pKVM has been initialised The stub hypercalls provide mechanisms to reset and replace the EL2 code, so uninstall them once pKVM has been initialised in order to ensure the integrity of the hypervisor code. To ensure pKVM initialisation remains functional, split cpu_hyp_reinit() into two helper functions to separate usage of the stub from usage of pkvm hypercalls either side of __pkvm_init on the boot CPU. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-4-will@kernel.org --- arch/arm64/kvm/arm.c | 31 +++++++++++++++++++++++-------- arch/arm64/kvm/hyp/nvhe/host.S | 26 +++++++++++++++++--------- 2 files changed, 40 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..9506cf88fa0e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1579,25 +1579,33 @@ static void cpu_set_hyp_vector(void) kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); } -static void cpu_hyp_reinit(void) +static void cpu_hyp_init_context(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); - cpu_hyp_reset(); - - if (is_kernel_in_hyp_mode()) - kvm_timer_init_vhe(); - else + if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); +} +static void cpu_hyp_init_features(void) +{ cpu_set_hyp_vector(); - kvm_arm_init_debug(); + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } +static void cpu_hyp_reinit(void) +{ + cpu_hyp_reset(); + cpu_hyp_init_context(); + cpu_hyp_init_features(); +} + static void _kvm_arch_hardware_enable(void *discard) { if (!__this_cpu_read(kvm_arm_hardware_enabled)) { @@ -1788,10 +1796,17 @@ static int do_pkvm_init(u32 hyp_va_bits) int ret; preempt_disable(); - hyp_install_host_vector(); + cpu_hyp_init_context(); ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, num_possible_cpus(), kern_hyp_va(per_cpu_base), hyp_va_bits); + cpu_hyp_init_features(); + + /* + * The stub hypercalls are now disabled, so set our local flag to + * prevent a later re-init attempt in kvm_arch_hardware_enable(). + */ + __this_cpu_write(kvm_arm_hardware_enabled, 1); preempt_enable(); return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 4b652ffb591d..0c6116d34e18 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic) b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) -.macro host_el1_sync_vect - .align 7 -.L__vect_start\@: - stp x0, x1, [sp, #-16]! - mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT - cmp x0, #ESR_ELx_EC_HVC64 - b.ne __host_exit - +SYM_FUNC_START(__host_hvc) ldp x0, x1, [sp] // Don't fixup the stack yet + /* No stub for you, sonny Jim */ +alternative_if ARM64_KVM_PROTECTED_MODE + b __host_exit +alternative_else_nop_endif + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit @@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic) ldr x5, =__kvm_handle_stub_hvc hyp_pa x5, x6 br x5 +SYM_FUNC_END(__host_hvc) + +.macro host_el1_sync_vect + .align 7 +.L__vect_start\@: + stp x0, x1, [sp, #-16]! + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT + cmp x0, #ESR_ELx_EC_HVC64 + b.eq __host_hvc + b __host_exit .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) .error "host_el1_sync_vect larger than vector entry" -- cgit v1.2.3-58-ga151 From 2f2e1a5069679491d18cf9021da19b40c56a17f3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:37 +0100 Subject: KVM: arm64: Propagate errors from __pkvm_prot_finalize hypercall If the __pkvm_prot_finalize hypercall returns an error, we WARN but fail to propagate the failure code back to kvm_arch_init(). Pass a pointer to a zero-initialised return variable so that failure to finalise the pKVM protections on a host CPU can be reported back to KVM. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-5-will@kernel.org --- arch/arm64/kvm/arm.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9506cf88fa0e..13bbf35896cd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1986,9 +1986,25 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *discard) +static void _kvm_host_prot_finalize(void *arg) { - WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; } static int finalize_hyp_mode(void) @@ -2002,15 +2018,7 @@ static int finalize_hyp_mode(void) * None of other sections should ever be introspected. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - on_each_cpu(_kvm_host_prot_finalize, NULL, 1); - - return 0; + return pkvm_drop_host_privileges(); } struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) -- cgit v1.2.3-58-ga151 From 07036cffe17ec07e8fb630d86f8ea21832d9e57d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:38 +0100 Subject: KVM: arm64: Prevent re-finalisation of pKVM for a given CPU __pkvm_prot_finalize() completes the deprivilege of the host when pKVM is in use by installing a stage-2 translation table for the calling CPU. Issuing the hypercall multiple times for a given CPU makes little sense, but in such a case just return early with -EPERM rather than go through the whole page-table dance again. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-6-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..cafe17e5fa8f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -123,6 +123,9 @@ int __pkvm_prot_finalize(void) struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + if (params->hcr_el2 & HCR_VM) + return -EPERM; + params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_kvm.arch.vtcr; params->hcr_el2 |= HCR_VM; -- cgit v1.2.3-58-ga151 From 057bed206f70d624c2eacb43ec56551950a26832 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:39 +0100 Subject: KVM: arm64: Disable privileged hypercalls after pKVM finalisation After pKVM has been 'finalised' using the __pkvm_prot_finalize hypercall, the calling CPU will have a Stage-2 translation enabled to prevent access to memory pages owned by EL2. Although this forms a significant part of the process to deprivilege the host kernel, we also need to ensure that the hypercall interface is reduced so that the EL2 code cannot, for example, be re-initialised using a new set of vectors. Re-order the hypercalls so that only a suffix remains available after finalisation of pKVM. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-7-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 25 ++++++++++++++----------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 37 ++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43b5e213ae43..4654d27fd221 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,27 +50,30 @@ #include enum __kvm_host_smccc_func { + /* Hypercalls available only prior to pKVM finalisation */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ - __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, + __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, + __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, + + /* Hypercalls available after pKVM finalisation */ + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, + __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, - __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, - __KVM_HOST_SMCCC_FUNC___pkvm_init, - __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, - __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, - __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, - __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, - __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..8566805ef62c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -165,36 +165,51 @@ typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x static const hcall_t host_hcall[] = { - HANDLE_FUNC(__kvm_vcpu_run), + /* ___kvm_hyp_init */ + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__pkvm_init), + HANDLE_FUNC(__pkvm_create_private_mapping), + HANDLE_FUNC(__pkvm_cpu_set_vector), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__vgic_v3_get_gic_config), + HANDLE_FUNC(__pkvm_prot_finalize), + + HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__kvm_adjust_pc), + HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__kvm_enable_ssbs), - HANDLE_FUNC(__vgic_v3_get_gic_config), HANDLE_FUNC(__vgic_v3_read_vmcr), HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_init_lrs), - HANDLE_FUNC(__kvm_get_mdcr_el2), HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), - HANDLE_FUNC(__pkvm_init), - HANDLE_FUNC(__pkvm_cpu_set_vector), - HANDLE_FUNC(__pkvm_host_share_hyp), - HANDLE_FUNC(__pkvm_create_private_mapping), - HANDLE_FUNC(__pkvm_prot_finalize), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); + unsigned long hcall_min = 0; hcall_t hfn; + /* + * If pKVM has been initialised then reject any calls to the + * early "privileged" hypercalls. Note that we cannot reject + * calls to __pkvm_prot_finalize for two reasons: (1) The static + * key used to determine initialisation must be toggled prior to + * finalisation and (2) finalisation is performed on a per-CPU + * basis. This is all fine, however, since __pkvm_prot_finalize + * returns -EPERM after the first call for a given CPU. + */ + if (static_branch_unlikely(&kvm_protected_mode_initialized)) + hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id -= KVM_HOST_SMCCC_ID(0); - if (unlikely(id >= ARRAY_SIZE(host_hcall))) + if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) goto inval; hfn = host_hcall[id]; -- cgit v1.2.3-58-ga151 From f25c5e4dafd859b941a4654cbab9eb83ff994bcd Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:11 -0700 Subject: kvm: arm64: vgic: Introduce vgic_check_iorange Add the new vgic_check_iorange helper that checks that an iorange is sane: the start address and size have valid alignments, the range is within the addressable PA range, start+size doesn't overflow, and the start wasn't already defined. No functional change. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-2-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.h | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 7740995de982..cc0ad227b380 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -29,6 +29,28 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, return 0; } +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size) +{ + int ret; + + ret = vgic_check_ioaddr(kvm, &ioaddr, addr, alignment); + if (ret) + return ret; + + if (!IS_ALIGNED(size, alignment)) + return -EINVAL; + + if (addr + size < addr) + return -EINVAL; + + if (addr + size > kvm_phys_size(kvm)) + return -E2BIG; + + return 0; +} + static int vgic_check_type(struct kvm *kvm, int type_needed) { if (kvm->arch.vgic.vgic_model != type_needed) diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 14a9218641f5..4be01c38e8f1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -175,6 +175,10 @@ void vgic_irq_handle_resampling(struct vgic_irq *irq, int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment); +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size); + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); -- cgit v1.2.3-58-ga151 From 4612d98f58c73ad63928200fd332f75c8e524dae Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:12 -0700 Subject: KVM: arm64: vgic-v3: Check redist region is not above the VM IPA size Verify that the redistributor regions do not extend beyond the VM-specified IPA range (phys_size). This can happen when using KVM_VGIC_V3_ADDR_TYPE_REDIST or KVM_VGIC_V3_ADDR_TYPE_REDIST_REGIONS with: base + size > phys_size AND base < phys_size Add the missing check into vgic_v3_alloc_redist_region() which is called when setting the regions, and into vgic_v3_check_base() which is called when attempting the first vcpu-run. The vcpu-run check does not apply to KVM_VGIC_V3_ADDR_TYPE_REDIST_REGIONS because the regions size is known before the first vcpu-run. Note that using the REDIST_REGIONS API results in a different check, which already exists, at first vcpu run: that the number of redist regions is enough for all vcpus. Finally, this patch also enables some extra tests in vgic_v3_alloc_redist_region() by calculating "size" early for the legacy redist api: like checking that the REDIST region can fit all the already created vcpus. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-3-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 6 ++++-- arch/arm64/kvm/vgic/vgic-v3.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a09cdc0b953c..a9642fc71fdf 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -796,7 +796,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, struct vgic_dist *d = &kvm->arch.vgic; struct vgic_redist_region *rdreg; struct list_head *rd_regions = &d->rd_regions; - size_t size = count * KVM_VGIC_V3_REDIST_SIZE; + int nr_vcpus = atomic_read(&kvm->online_vcpus); + size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE + : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE; int ret; /* cross the end of memory ? */ @@ -840,7 +842,7 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, rdreg->base = VGIC_ADDR_UNDEF; - ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size); if (ret) goto free; diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 21a6207fb2ee..960f51a8691f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -483,8 +483,10 @@ bool vgic_v3_check_base(struct kvm *kvm) return false; list_for_each_entry(rdreg, &d->rd_regions, list) { - if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < - rdreg->base) + size_t sz = vgic_v3_rd_region_size(kvm, rdreg); + + if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, + rdreg->base, SZ_64K, sz)) return false; } -- cgit v1.2.3-58-ga151 From c56a87da0a7fa14180082249ac954c7ebc9e74e1 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:13 -0700 Subject: KVM: arm64: vgic-v2: Check cpu interface region is not above the VM IPA size Verify that the GICv2 CPU interface does not extend beyond the VM-specified IPA range (phys_size). base + size > phys_size AND base < phys_size Add the missing check into kvm_vgic_addr() which is called when setting the region. This patch also enables some superfluous checks for the distributor (vgic_check_ioaddr was enough as alignment == size for the distributors). Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-4-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index cc0ad227b380..08ae34b1a986 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -79,7 +79,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t *addr_ptr, alignment; + phys_addr_t *addr_ptr, alignment, size; u64 undef_value = VGIC_ADDR_UNDEF; mutex_lock(&kvm->lock); @@ -88,16 +88,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_4K; + size = KVM_VGIC_V2_DIST_SIZE; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_cpu_base; alignment = SZ_4K; + size = KVM_VGIC_V2_CPU_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; + size = KVM_VGIC_V3_DIST_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_REDIST: { struct vgic_redist_region *rdreg; @@ -162,7 +165,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; if (write) { - r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); + r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size); if (!r) *addr_ptr = *addr; } else { -- cgit v1.2.3-58-ga151 From 2ec02f6c64f043a249850c835ca7975c3a155d8b Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:14 -0700 Subject: KVM: arm64: vgic-v3: Check ITS region is not above the VM IPA size Verify that the ITS region does not extend beyond the VM-specified IPA range (phys_size). base + size > phys_size AND base < phys_size Add the missing check into vgic_its_set_attr() which is called when setting the region. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-5-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-its.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 61728c543eb9..ad55bb8cd30f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (copy_from_user(&addr, uaddr, sizeof(addr))) return -EFAULT; - ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, - addr, SZ_64K); + ret = vgic_check_iorange(dev->kvm, its->vgic_its_base, + addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE); if (ret) return ret; -- cgit v1.2.3-58-ga151 From 96e903896969679104c7fef2c776ed1b5b09584f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:15 -0700 Subject: KVM: arm64: vgic: Drop vgic_check_ioaddr() There are no more users of vgic_check_ioaddr(). Move its checks to vgic_check_iorange() and then remove it. Signed-off-by: Ricardo Koller Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-6-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 26 ++++---------------------- arch/arm64/kvm/vgic/vgic.h | 3 --- 2 files changed, 4 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 08ae34b1a986..0d000d2fe8d2 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -14,38 +14,20 @@ /* common helpers */ -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) -{ - if (addr & ~kvm_phys_mask(kvm)) - return -E2BIG; - - if (!IS_ALIGNED(addr, alignment)) - return -EINVAL; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - - return 0; -} - int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, phys_addr_t addr, phys_addr_t alignment, phys_addr_t size) { - int ret; - - ret = vgic_check_ioaddr(kvm, &ioaddr, addr, alignment); - if (ret) - return ret; + if (!IS_VGIC_ADDR_UNDEF(ioaddr)) + return -EEXIST; - if (!IS_ALIGNED(size, alignment)) + if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment)) return -EINVAL; if (addr + size < addr) return -EINVAL; - if (addr + size > kvm_phys_size(kvm)) + if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) return -E2BIG; return 0; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4be01c38e8f1..3fd6c86a7ef3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -172,9 +172,6 @@ void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment); - int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, phys_addr_t addr, phys_addr_t alignment, phys_addr_t size); -- cgit v1.2.3-58-ga151 From b6a68b97af23cc75781bed38221ce73144ac2e39 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Oct 2021 18:05:53 +0100 Subject: KVM: arm64: Allow KVM to be disabled from the command line Although KVM can be compiled out of the kernel, it cannot be disabled at runtime. Allow this possibility by introducing a new mode that will prevent KVM from initialising. This is useful in the (limited) circumstances where you don't want KVM to be available (what is wrong with you?), or when you want to install another hypervisor instead (good luck with that). Reviewed-by: David Brazdil Acked-by: Will Deacon Acked-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Reviewed-by: Andrew Scull Link: https://lore.kernel.org/r/20211001170553.3062988-1-maz@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 14 +++++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 91ba391f9b32..f268731a3d4d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2365,6 +2365,8 @@ kvm-arm.mode= [KVM,ARM] Select one of KVM/arm64's modes of operation. + none: Forcefully disable KVM. + nvhe: Standard nVHE-based mode, without support for protected guests. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342b..019490c67976 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,6 +58,7 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, + KVM_MODE_NONE, }; enum kvm_mode kvm_get_mode(void); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..658171231af9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2064,6 +2064,11 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (kvm_get_mode() == KVM_MODE_NONE) { + kvm_info("KVM disabled from command line\n"); + return -ENODEV; + } + in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || @@ -2137,8 +2142,15 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } - if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) + if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { + kvm_mode = KVM_MODE_DEFAULT; return 0; + } + + if (strcmp(arg, "none") == 0) { + kvm_mode = KVM_MODE_NONE; + return 0; + } return -EINVAL; } -- cgit v1.2.3-58-ga151 From c8f1e96734069c788b10545f4fd82bcbb6b55dfa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Sep 2021 15:22:30 -0700 Subject: KVM: arm64: Unconditionally include generic KVM's Kconfig Unconditionally "source" the generic KVM Kconfig instead of wrapping it with KVM=y. A future patch will select HAVE_KVM so that referencing HAVE_KVM in common kernel code doesn't break, and because KVM=y and HAVE_KVM=n is weird. Source the generic KVM Kconfig unconditionally so that HAVE_KVM and KVM don't end up with a circular dependency. Note, all but one of generic KVM's "configs" are of the HAVE_XYZ nature, and the one outlier correctly takes a dependency on CONFIG_KVM, i.e. the generic Kconfig is intended to be included unconditionally. No functional change intended. Signed-off-by: Sean Christopherson [maz: made NVHE_EL2_DEBUG depend on KVM] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210921222231.518092-2-seanjc@google.com --- arch/arm64/kvm/Kconfig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d7eec0b43744..1170b20d68a7 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/lib/Kconfig" +source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -43,12 +44,9 @@ menuconfig KVM If unsure, say N. -if KVM - -source "virt/kvm/Kconfig" - config NVHE_EL2_DEBUG bool "Debug mode for non-VHE EL2 object" + depends on KVM help Say Y here to enable the debug mode for the non-VHE KVM EL2 object. Failure reports will BUG() in the hypervisor. This is intended for @@ -56,6 +54,4 @@ config NVHE_EL2_DEBUG If unsure, say N. -endif # KVM - endif # VIRTUALIZATION -- cgit v1.2.3-58-ga151 From e26bb75aa2f17fc079e6a24dff653b098e1f5d37 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Sep 2021 15:22:31 -0700 Subject: KVM: arm64: Depend on HAVE_KVM instead of OF Select HAVE_KVM at all times on arm64, as the OF requirement is always there (even in the case of an ACPI system, we still depend on some of the OF infrastructure), and won't fo away. No functional change intended. Signed-off-by: Sean Christopherson Acked-by: Will Deacon [maz: Drop the "HAVE_KVM if OF" dependency, as OF is always there on arm64, new commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210921222231.518092-3-seanjc@google.com --- arch/arm64/Kconfig | 1 + arch/arm64/kvm/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..0d921d21fd35 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -186,6 +186,7 @@ config ARM64 select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KVM select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1170b20d68a7..8ffcbe29395e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -20,7 +20,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" - depends on OF + depends on HAVE_KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3-58-ga151 From 00d5101b254b77c35a8d55fe46331b19192866f3 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:38 +0100 Subject: KVM: arm64: Return early from read_id_reg() if register is RAZ If read_id_reg() is called for an ID register which is Read-As-Zero (RAZ), it initializes the return value to zero, then goes through a list of registers which require special handling before returning the final value. By not returning as soon as it checks that the register should be RAZ, the function creates the opportunity for bugs, if, for example, a patch changes a register to RAZ (like has happened with PMSWINC_EL0 in commit 11663111cd49), but doesn't remove the special handling from read_id_reg(); or if a register is RAZ in certain situations, but readable in others. Return early to make it impossible for a RAZ register to be anything other than zero. Reviewed-by: Andrew Jones Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-2-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..4adda8bf3168 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1064,7 +1064,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) { u32 id = reg_to_encoding(r); - u64 val = raz ? 0 : read_sanitised_ftr_reg(id); + u64 val; + + if (raz) + return 0; + + val = read_sanitised_ftr_reg(id); switch (id) { case SYS_ID_AA64PFR0_EL1: -- cgit v1.2.3-58-ga151 From 5a4309762356f05df4c92629e5df15ab75c42c0d Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:39 +0100 Subject: KVM: arm64: Use get_raz_reg() for userspace reads of PMSWINC_EL0 PMSWINC_EL0 is a write-only register and was initially part of the VCPU register state, but was later removed in commit 7a3ba3095a32 ("KVM: arm64: Remove PMSWINC_EL0 shadow register"). To prevent regressions, the register was kept accessible from userspace as Read-As-Zero (RAZ). The read function that is used to handle userspace reads of this register is get_raz_id_reg(), which, while technically correct, as it returns 0, it is not semantically correct, as PMSWINC_EL0 is not an ID register as the function name suggests. Add a new function, get_raz_reg(), to use it as the accessor for PMSWINC_EL0, as to not conflate get_raz_id_reg() to handle other types of registers. No functional change intended. Signed-off-by: Alexandru Elisei Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-3-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4adda8bf3168..1be827740f87 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1285,6 +1285,15 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, true); } +static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + const u64 id = sys_reg_to_index(rd); + const u64 val = 0; + + return reg_to_user(uaddr, &val, id); +} + static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { @@ -1647,7 +1656,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { * previously (and pointlessly) advertised in the past... */ { PMU_SYS_REG(SYS_PMSWINC_EL0), - .get_user = get_raz_id_reg, .set_user = set_wi_reg, + .get_user = get_raz_reg, .set_user = set_wi_reg, .access = access_pmswinc, .reset = NULL }, { PMU_SYS_REG(SYS_PMSELR_EL0), .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, -- cgit v1.2.3-58-ga151 From ebf6aa8c047352fe43b1e20e580f12d5564da28e Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:40 +0100 Subject: KVM: arm64: Replace get_raz_id_reg() with get_raz_reg() Reading a RAZ ID register isn't different from reading any other RAZ register, so get rid of get_raz_id_reg() and replace it with get_raz_reg(), which does the same thing, but does it without going through two layers of indirection. No functional change. Suggested-by: Andrew Jones Signed-off-by: Alexandru Elisei Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-4-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1be827740f87..3aff06aafd0c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1273,12 +1273,6 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, raz); } -static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - return __get_id_reg(vcpu, rd, uaddr, true); -} - static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { @@ -1402,7 +1396,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_UNALLOCATED(crm, op2) { \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } @@ -1414,7 +1408,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_HIDDEN(name) { \ SYS_DESC(SYS_##name), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } -- cgit v1.2.3-58-ga151 From 7dd9b5a157485ae8c48f76f087b1867ace016613 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:26 +0100 Subject: KVM: arm64: Move __get_fault_info() and co into their own include file In order to avoid including the whole of the switching helpers in unrelated files, move the __get_fault_info() and related helpers into their own include file. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-2-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/fault.h | 75 +++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 61 +-------------------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 3 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/fault.h (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h new file mode 100644 index 000000000000..1b8a2dcd712f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_FAULT_H__ +#define __ARM64_KVM_HYP_FAULT_H__ + +#include +#include +#include +#include + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg_par(); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg_par(); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar, far; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + fault->far_el2 = far; + fault->hpfar_el2 = hpfar; + return true; +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be..54abc8298ec3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -8,6 +8,7 @@ #define __ARM64_KVM_HYP_SWITCH_H__ #include +#include #include #include @@ -133,66 +134,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg_par(); - if (!__kvm_at("s1e1r", far)) - tmp = read_sysreg_par(); - else - tmp = SYS_PAR_EL1_F; /* back to the guest */ - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) -{ - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; - return true; -} - static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { u8 ec; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..2a07d63b8498 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include -- cgit v1.2.3-58-ga151 From cc1e6fdfa92b82902883b70dafa729d3bd427b80 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:27 +0100 Subject: KVM: arm64: Don't include switch.h into nvhe/kvm-main.c hyp-main.c includes switch.h while it only requires adjust-pc.h. Fix it to remove an unnecessary dependency. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-3-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..8ca1104f4774 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -4,7 +4,7 @@ * Author: Andrew Scull */ -#include +#include #include #include -- cgit v1.2.3-58-ga151 From 8fb2046180a0ad347f2e5bcae760dca67e65aa73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:28 +0100 Subject: KVM: arm64: Move early handlers to per-EC handlers Simplify the early exception handling by slicing the gigantic decoding tree into a more manageable set of functions, similar to what we have in handle_exit.c. This will also make the structure reusable for pKVM's own early exit handling. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-4-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 160 ++++++++++++++++++-------------- arch/arm64/kvm/hyp/nvhe/switch.c | 16 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 16 ++++ 3 files changed, 124 insertions(+), 68 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 54abc8298ec3..1e4177322be7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -136,16 +136,7 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { - u8 ec; - u64 esr; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - return __get_fault_info(esr, &vcpu->arch.fault); + return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) @@ -166,8 +157,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +/* + * We trap the first access to the FP/SIMD to save the host context and + * restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an undefined + * instruction exception to the guest. Similarly for trapped SVE accesses. + */ +static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest, sve_host; u8 esr_ec; @@ -185,9 +181,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) } esr_ec = kvm_vcpu_trap_get_class(vcpu); - if (esr_ec != ESR_ELx_EC_FP_ASIMD && - esr_ec != ESR_ELx_EC_SVE) - return false; /* Don't handle SVE traps for non-SVE vcpus here: */ if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) @@ -325,7 +318,7 @@ static inline bool esr_is_ptrauth_trap(u32 esr) DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); -static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm_cpu_context *ctxt; u64 val; @@ -350,6 +343,87 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) return true; } +static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + handle_tx2_tvm(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + return false; +} + +static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_abt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + } + } + + return false; +} + +typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); + +static const exit_handler_fn *kvm_get_exit_handler_array(void); + +/* + * Allow the hypervisor to handle the exit with an exit handler if it has one. + * + * Returns true if the hypervisor handled the exit, and control should go back + * to the guest, or false if it hasn't. + */ +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const exit_handler_fn *handlers = kvm_get_exit_handler_array(); + exit_handler_fn fn; + + fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; + + if (fn) + return fn(vcpu, exit_code); + + return false; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -384,59 +458,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) + /* Check if there's an exit handler and allow it to handle the exit. */ + if (kvm_hyp_handle_exit(vcpu, exit_code)) goto guest; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - goto guest; - - if (__hyp_handle_ptrauth(vcpu)) - goto guest; - - if (!__populate_fault_info(vcpu)) - goto guest; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_abt_issea(vcpu) && - !kvm_vcpu_abt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - } - exit: /* Return to the host kernel and handle the exit */ return false; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a34b01cc8ab9..4f3992a1aabd 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -158,6 +158,22 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(void) +{ + return hyp_exit_handlers; +} + /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index ded2c66675f0..9aedc8afc8b9 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); } +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(void) +{ + return hyp_exit_handlers; +} + /* Switch to the guest for VHE systems running in EL2 */ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { -- cgit v1.2.3-58-ga151 From 3b1a690eda0dc1891e8fc93991b122bff6fabf8c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:29 +0100 Subject: KVM: arm64: Pass struct kvm to per-EC handlers We need struct kvm to check for protected VMs to be able to pick the right handlers for them in subsequent patches. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-5-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1e4177322be7..481399bf9b94 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -403,7 +403,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(void); +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); /* * Allow the hypervisor to handle the exit with an exit handler if it has one. @@ -413,7 +413,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(void); */ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(); + const exit_handler_fn *handlers = kvm_get_exit_handler_array(kern_hyp_va(vcpu->kvm)); exit_handler_fn fn; fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 4f3992a1aabd..8c9a0464be00 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -169,7 +169,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(void) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { return hyp_exit_handlers; } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 9aedc8afc8b9..f6fb97accf65 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -107,7 +107,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(void) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { return hyp_exit_handlers; } -- cgit v1.2.3-58-ga151 From 53868390778270f2890621f4498a53587719a3ff Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:30 +0100 Subject: KVM: arm64: Add missing field descriptor for MDCR_EL2 It's not currently used. Added for completeness. No functional change intended. Suggested-by: Marc Zyngier Signed-off-by: Fuad Tabba Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-6-tabba@google.com --- arch/arm64/include/asm/kvm_arm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 327120c0089f..a39fcf318c77 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -295,6 +295,7 @@ #define MDCR_EL2_HPMFZO (UL(1) << 29) #define MDCR_EL2_MTPME (UL(1) << 28) #define MDCR_EL2_TDCC (UL(1) << 27) +#define MDCR_EL2_HLP (UL(1) << 26) #define MDCR_EL2_HCCD (UL(1) << 23) #define MDCR_EL2_TTRF (UL(1) << 19) #define MDCR_EL2_HPMD (UL(1) << 17) -- cgit v1.2.3-58-ga151 From 16dd1fbb12f72effcd3539561c2a94aed3ab6581 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:31 +0100 Subject: KVM: arm64: Simplify masking out MTE in feature id reg Simplify code for hiding MTE support in feature id register when MTE is not enabled/supported by KVM. No functional change intended. Signed-off-by: Fuad Tabba Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-7-tabba@google.com --- arch/arm64/kvm/sys_regs.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..447acce9ca84 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1077,14 +1077,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); break; case SYS_ID_AA64PFR1_EL1: - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); - if (kvm_has_mte(vcpu->kvm)) { - u64 pfr, mte; - - pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); - mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte); - } + if (!kvm_has_mte(vcpu->kvm)) + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) -- cgit v1.2.3-58-ga151 From 6c30bfb18d0b7d09593f204c936493cfcd153956 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:32 +0100 Subject: KVM: arm64: Add handlers for protected VM System Registers Add system register handlers for protected VMs. These cover Sys64 registers (including feature id registers), and debug. No functional change intended as these are not hooked in yet to the guest exit handlers introduced earlier. So when trapping is triggered, the exit handlers let the host handle it, as before. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-8-tabba@google.com --- arch/arm64/include/asm/kvm_fixed_config.h | 195 +++++++++++ arch/arm64/include/asm/kvm_hyp.h | 5 + arch/arm64/kvm/arm.c | 5 + arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 29 ++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/setup.c | 3 + arch/arm64/kvm/hyp/nvhe/switch.c | 1 + arch/arm64/kvm/hyp/nvhe/sys_regs.c | 498 +++++++++++++++++++++++++++++ 8 files changed, 737 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/kvm_fixed_config.h create mode 100644 arch/arm64/kvm/hyp/include/nvhe/sys_regs.h create mode 100644 arch/arm64/kvm/hyp/nvhe/sys_regs.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_fixed_config.h b/arch/arm64/include/asm/kvm_fixed_config.h new file mode 100644 index 000000000000..0ed06923f7e9 --- /dev/null +++ b/arch/arm64/include/asm/kvm_fixed_config.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_FIXED_CONFIG_H__ +#define __ARM64_KVM_FIXED_CONFIG_H__ + +#include + +/* + * This file contains definitions for features to be allowed or restricted for + * guest virtual machines, depending on the mode KVM is running in and on the + * type of guest that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ) + +#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 657d0c94cf82..5afd14ab15b9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -115,7 +115,12 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); #endif +extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..6aa7b0c5bf21 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1802,8 +1802,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) void *addr = phys_to_virt(hyp_mem_base); int ret; + kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); + kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h new file mode 100644 index 000000000000..3288128738aa --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_SYS_REGS_H__ +#define __ARM64_KVM_NVHE_SYS_REGS_H__ + +#include + +u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu); + +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +int kvm_check_pvm_sysreg_table(void); +void inject_undef64(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_NVHE_SYS_REGS_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 8d741f71377f..0bbe37a18d5d 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o + cache.o setup.o mm.o mem_protect.o sys_regs.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 57c27846320f..c85ff64e63f2 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include struct hyp_pool hpool; @@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); int ret; + BUG_ON(kvm_check_pvm_sysreg_table()); + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8c9a0464be00..17d1a9512507 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -28,6 +28,7 @@ #include #include +#include /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c new file mode 100644 index 000000000000..6e3ea49af302 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include +#include +#include + +#include + +#include + +#include "../../sys_regs.h" + +/* + * Copies of the host's CPU features registers holding sanitized values at hyp. + */ +u64 id_aa64pfr0_el1_sys_val; +u64 id_aa64pfr1_el1_sys_val; +u64 id_aa64isar0_el1_sys_val; +u64 id_aa64isar1_el1_sys_val; +u64 id_aa64mmfr2_el1_sys_val; + +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +void inject_undef64(struct kvm_vcpu *vcpu) +{ + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); + + __kvm_adjust_pc(vcpu); + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); +} + +/* + * Returns the restricted features values of the feature register based on the + * limitations in restrict_fields. + * A feature id field value of 0b0000 does not impose any restrictions. + * Note: Use only for unsigned feature field values. + */ +static u64 get_restricted_features_unsigned(u64 sys_reg_val, + u64 restrict_fields) +{ + u64 value = 0UL; + u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + /* + * According to the Arm Architecture Reference Manual, feature fields + * use increasing values to indicate increases in functionality. + * Iterate over the restricted feature fields and calculate the minimum + * unsigned value between the one supported by the system, and what the + * value is being restricted to. + */ + while (sys_reg_val && restrict_fields) { + value |= min(sys_reg_val & mask, restrict_fields & mask); + sys_reg_val &= ~mask; + restrict_fields &= ~mask; + mask <<= ARM64_FEATURE_FIELD_BITS; + } + + return value; +} + +/* + * Functions that return the value of feature id registers for protected VMs + * based on allowed features, system features, and KVM support. + */ + +u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 set_mask = 0; + u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; + + if (!vcpu_has_sve(vcpu)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); + + set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); + + /* Spectre and Meltdown mitigation in KVM */ + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), + (u64)kvm->arch.pfr0_csv2); + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), + (u64)kvm->arch.pfr0_csv3); + + return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; +} + +u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; + + if (!kvm_has_mte(kvm)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + + return id_aa64pfr1_el1_sys_val & allow_mask; +} + +u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for Scalable Vectors, therefore, hyp has no sanitized + * copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, including breakpoints, and watchpoints, + * therefore, pKVM has no sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, therefore, hyp has no sanitized copy of the + * feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) +{ + return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; +} + +u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + + return id_aa64isar1_el1_sys_val & allow_mask; +} + +u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) +{ + u64 set_mask; + + set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val, + PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED); + + return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; +} + +u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; +} + +u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; +} + +/* Read a sanitized cpufeature ID register by its sys_reg_desc. */ +static u64 read_id_reg(const struct kvm_vcpu *vcpu, + struct sys_reg_desc const *r) +{ + u32 id = reg_to_encoding(r); + + switch (id) { + case SYS_ID_AA64PFR0_EL1: + return get_pvm_id_aa64pfr0(vcpu); + case SYS_ID_AA64PFR1_EL1: + return get_pvm_id_aa64pfr1(vcpu); + case SYS_ID_AA64ZFR0_EL1: + return get_pvm_id_aa64zfr0(vcpu); + case SYS_ID_AA64DFR0_EL1: + return get_pvm_id_aa64dfr0(vcpu); + case SYS_ID_AA64DFR1_EL1: + return get_pvm_id_aa64dfr1(vcpu); + case SYS_ID_AA64AFR0_EL1: + return get_pvm_id_aa64afr0(vcpu); + case SYS_ID_AA64AFR1_EL1: + return get_pvm_id_aa64afr1(vcpu); + case SYS_ID_AA64ISAR0_EL1: + return get_pvm_id_aa64isar0(vcpu); + case SYS_ID_AA64ISAR1_EL1: + return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64MMFR0_EL1: + return get_pvm_id_aa64mmfr0(vcpu); + case SYS_ID_AA64MMFR1_EL1: + return get_pvm_id_aa64mmfr1(vcpu); + case SYS_ID_AA64MMFR2_EL1: + return get_pvm_id_aa64mmfr2(vcpu); + default: + /* + * Should never happen because all cases are covered in + * pvm_sys_reg_descs[]. + */ + WARN_ON(1); + break; + } + + return 0; +} + +/* + * Accessor for AArch32 feature id registers. + * + * The value of these registers is "unknown" according to the spec if AArch32 + * isn't supported. + */ +static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + /* + * No support for AArch32 guests, therefore, pKVM has no sanitized copy + * of AArch32 feature id registers. + */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); + + /* Use 0 for architecturally "unknown" values. */ + p->regval = 0; + return true; +} + +/* + * Accessor for AArch64 feature id registers. + * + * If access is allowed, set the regval to the protected VM's view of the + * register and return true. + * Otherwise, inject an undefined exception and return false. + */ +static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + p->regval = read_id_reg(vcpu, r); + return true; +} + +/* Mark the specified system register as an AArch32 feature id register. */ +#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } + +/* Mark the specified system register as an AArch64 feature id register. */ +#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } + +/* Mark the specified system register as not being handled in hyp. */ +#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL } + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * NOTE: Anything not explicitly listed here is *restricted by default*, i.e., + * it will lead to injecting an exception into the guest. + */ +static const struct sys_reg_desc pvm_sys_reg_descs[] = { + /* Cache maintenance by set/way operations are restricted. */ + + /* Debug and Trace Registers are restricted. */ + + /* AArch64 mappings of the AArch32 ID registers */ + /* CRm=1 */ + AARCH32(SYS_ID_PFR0_EL1), + AARCH32(SYS_ID_PFR1_EL1), + AARCH32(SYS_ID_DFR0_EL1), + AARCH32(SYS_ID_AFR0_EL1), + AARCH32(SYS_ID_MMFR0_EL1), + AARCH32(SYS_ID_MMFR1_EL1), + AARCH32(SYS_ID_MMFR2_EL1), + AARCH32(SYS_ID_MMFR3_EL1), + + /* CRm=2 */ + AARCH32(SYS_ID_ISAR0_EL1), + AARCH32(SYS_ID_ISAR1_EL1), + AARCH32(SYS_ID_ISAR2_EL1), + AARCH32(SYS_ID_ISAR3_EL1), + AARCH32(SYS_ID_ISAR4_EL1), + AARCH32(SYS_ID_ISAR5_EL1), + AARCH32(SYS_ID_MMFR4_EL1), + AARCH32(SYS_ID_ISAR6_EL1), + + /* CRm=3 */ + AARCH32(SYS_MVFR0_EL1), + AARCH32(SYS_MVFR1_EL1), + AARCH32(SYS_MVFR2_EL1), + AARCH32(SYS_ID_PFR2_EL1), + AARCH32(SYS_ID_DFR1_EL1), + AARCH32(SYS_ID_MMFR5_EL1), + + /* AArch64 ID registers */ + /* CRm=4 */ + AARCH64(SYS_ID_AA64PFR0_EL1), + AARCH64(SYS_ID_AA64PFR1_EL1), + AARCH64(SYS_ID_AA64ZFR0_EL1), + AARCH64(SYS_ID_AA64DFR0_EL1), + AARCH64(SYS_ID_AA64DFR1_EL1), + AARCH64(SYS_ID_AA64AFR0_EL1), + AARCH64(SYS_ID_AA64AFR1_EL1), + AARCH64(SYS_ID_AA64ISAR0_EL1), + AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64MMFR0_EL1), + AARCH64(SYS_ID_AA64MMFR1_EL1), + AARCH64(SYS_ID_AA64MMFR2_EL1), + + HOST_HANDLED(SYS_SCTLR_EL1), + HOST_HANDLED(SYS_ACTLR_EL1), + HOST_HANDLED(SYS_CPACR_EL1), + + HOST_HANDLED(SYS_RGSR_EL1), + HOST_HANDLED(SYS_GCR_EL1), + + /* Scalable Vector Registers are restricted. */ + + HOST_HANDLED(SYS_TTBR0_EL1), + HOST_HANDLED(SYS_TTBR1_EL1), + HOST_HANDLED(SYS_TCR_EL1), + + HOST_HANDLED(SYS_APIAKEYLO_EL1), + HOST_HANDLED(SYS_APIAKEYHI_EL1), + HOST_HANDLED(SYS_APIBKEYLO_EL1), + HOST_HANDLED(SYS_APIBKEYHI_EL1), + HOST_HANDLED(SYS_APDAKEYLO_EL1), + HOST_HANDLED(SYS_APDAKEYHI_EL1), + HOST_HANDLED(SYS_APDBKEYLO_EL1), + HOST_HANDLED(SYS_APDBKEYHI_EL1), + HOST_HANDLED(SYS_APGAKEYLO_EL1), + HOST_HANDLED(SYS_APGAKEYHI_EL1), + + HOST_HANDLED(SYS_AFSR0_EL1), + HOST_HANDLED(SYS_AFSR1_EL1), + HOST_HANDLED(SYS_ESR_EL1), + + HOST_HANDLED(SYS_ERRIDR_EL1), + HOST_HANDLED(SYS_ERRSELR_EL1), + HOST_HANDLED(SYS_ERXFR_EL1), + HOST_HANDLED(SYS_ERXCTLR_EL1), + HOST_HANDLED(SYS_ERXSTATUS_EL1), + HOST_HANDLED(SYS_ERXADDR_EL1), + HOST_HANDLED(SYS_ERXMISC0_EL1), + HOST_HANDLED(SYS_ERXMISC1_EL1), + + HOST_HANDLED(SYS_TFSR_EL1), + HOST_HANDLED(SYS_TFSRE0_EL1), + + HOST_HANDLED(SYS_FAR_EL1), + HOST_HANDLED(SYS_PAR_EL1), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_MAIR_EL1), + HOST_HANDLED(SYS_AMAIR_EL1), + + /* Limited Ordering Regions Registers are restricted. */ + + HOST_HANDLED(SYS_VBAR_EL1), + HOST_HANDLED(SYS_DISR_EL1), + + /* GIC CPU Interface registers are restricted. */ + + HOST_HANDLED(SYS_CONTEXTIDR_EL1), + HOST_HANDLED(SYS_TPIDR_EL1), + + HOST_HANDLED(SYS_SCXTNUM_EL1), + + HOST_HANDLED(SYS_CNTKCTL_EL1), + + HOST_HANDLED(SYS_CCSIDR_EL1), + HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_CSSELR_EL1), + HOST_HANDLED(SYS_CTR_EL0), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_TPIDR_EL0), + HOST_HANDLED(SYS_TPIDRRO_EL0), + + HOST_HANDLED(SYS_SCXTNUM_EL0), + + /* Activity Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_CNTP_TVAL_EL0), + HOST_HANDLED(SYS_CNTP_CTL_EL0), + HOST_HANDLED(SYS_CNTP_CVAL_EL0), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_DACR32_EL2), + HOST_HANDLED(SYS_IFSR32_EL2), + HOST_HANDLED(SYS_FPEXC32_EL2), +}; + +/* + * Checks that the sysreg table is unique and in-order. + * + * Returns 0 if the table is consistent, or 1 otherwise. + */ +int kvm_check_pvm_sysreg_table(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) { + if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0) + return 1; + } + + return 0; +} + +/* + * Handler for protected VM MSR, MRS or System instruction execution. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't, to be handled by the host. + */ +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const struct sys_reg_desc *r; + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int Rt = kvm_vcpu_sys_get_rt(vcpu); + + params = esr_sys64_to_params(esr); + params.regval = vcpu_get_reg(vcpu, Rt); + + r = find_reg(¶ms, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs)); + + /* Undefined (RESTRICTED). */ + if (r == NULL) { + inject_undef64(vcpu); + return true; + } + + /* Handled by the host (HOST_HANDLED) */ + if (r->access == NULL) + return false; + + /* Handled by hyp: skip instruction if instructed to do so. */ + if (r->access(vcpu, ¶ms, r)) + __kvm_skip_instr(vcpu); + + if (!params.is_write) + vcpu_set_reg(vcpu, Rt, params.regval); + + return true; +} -- cgit v1.2.3-58-ga151 From 2a0c343386ae1a6826e1b9d751bfc14f4711c2de Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:33 +0100 Subject: KVM: arm64: Initialize trap registers for protected VMs Protected VMs have more restricted features that need to be trapped. Moreover, the host should not be trusted to set the appropriate trapping registers and their values. Initialize the trapping registers, i.e., hcr_el2, mdcr_el2, and cptr_el2 at EL2 for protected guests, based on the values of the guest's feature id registers. No functional change intended as trap handlers introduced in the previous patch are still not hooked in to the guest exit handlers. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-9-tabba@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/kvm/arm.c | 8 ++ arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 2 + arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 186 +++++++++++++++++++++++++ 7 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/pkvm.c (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..a460e1243cef 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,6 +64,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 +#define __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps 21 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342b..4a323aa27a6b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -780,6 +780,8 @@ static inline bool kvm_vm_is_protected(struct kvm *kvm) return false; } +void kvm_init_protected_traps(struct kvm_vcpu *vcpu); + int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6aa7b0c5bf21..3af6d59d1919 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -620,6 +620,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) ret = kvm_arm_pmu_v3_enable(vcpu); + /* + * Initialize traps for protected VMs. + * NOTE: Move to run in EL2 directly, rather than via a hypercall, once + * the code is in place for first run initialization at EL2. + */ + if (kvm_vm_is_protected(kvm)) + kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + return ret; } diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h index 1e6d995968a1..45a84f0ade04 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -15,4 +15,6 @@ #define DECLARE_REG(type, name, ctxt, reg) \ type name = (type)cpu_reg(ctxt, (reg)) +void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0bbe37a18d5d..c3c11974fa3b 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o sys_regs.o + cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8ca1104f4774..a6303db09cd6 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -160,6 +160,14 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } + +static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -185,6 +193,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), + HANDLE_FUNC(__pkvm_vcpu_init_traps), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c new file mode 100644 index 000000000000..633547cc1033 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include +#include +#include +#include +#include + +/* + * Set trap register values based on features in ID_AA64PFR0. + */ +static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64pfr0(vcpu); + u64 hcr_set = HCR_RW; + u64 hcr_clear = 0; + u64 cptr_set = 0; + + /* Protected KVM does not support AArch32 guests. */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + + /* + * Linux guests assume support for floating-point and Advanced SIMD. Do + * not change the trapping behavior for these from the KVM default. + */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + /* Trap RAS unless all current versions are supported */ + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) < + ID_AA64PFR0_RAS_V1P1) { + hcr_set |= HCR_TERR | HCR_TEA; + hcr_clear |= HCR_FIEN; + } + + /* Trap AMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) { + hcr_clear |= HCR_AMVOFFEN; + cptr_set |= CPTR_EL2_TAM; + } + + /* Trap SVE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids)) + cptr_set |= CPTR_EL2_TZ; + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64PFR1. + */ +static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64pfr1(vcpu); + u64 hcr_set = 0; + u64 hcr_clear = 0; + + /* Memory Tagging: Trap and Treat as Untagged if not supported. */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) { + hcr_set |= HCR_TID5; + hcr_clear |= HCR_DCT | HCR_ATA; + } + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; +} + +/* + * Set trap register values based on features in ID_AA64DFR0. + */ +static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64dfr0(vcpu); + u64 mdcr_set = 0; + u64 mdcr_clear = 0; + u64 cptr_set = 0; + + /* Trap/constrain PMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; + mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | + MDCR_EL2_HPMN_MASK; + } + + /* Trap Debug */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids)) + mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; + + /* Trap OS Double Lock */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids)) + mdcr_set |= MDCR_EL2_TDOSA; + + /* Trap SPE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPMS; + mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + } + + /* Trap Trace Filter */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids)) + mdcr_set |= MDCR_EL2_TTRF; + + /* Trap Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids)) + cptr_set |= CPTR_EL2_TTA; + + vcpu->arch.mdcr_el2 |= mdcr_set; + vcpu->arch.mdcr_el2 &= ~mdcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR0. + */ +static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64mmfr0(vcpu); + u64 mdcr_set = 0; + + /* Trap Debug Communications Channel registers */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids)) + mdcr_set |= MDCR_EL2_TDCC; + + vcpu->arch.mdcr_el2 |= mdcr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR1. + */ +static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64mmfr1(vcpu); + u64 hcr_set = 0; + + /* Trap LOR */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids)) + hcr_set |= HCR_TLOR; + + vcpu->arch.hcr_el2 |= hcr_set; +} + +/* + * Set baseline trap register values. + */ +static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) +{ + const u64 hcr_trap_feat_regs = HCR_TID3; + const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; + + /* + * Always trap: + * - Feature id registers: to control features exposed to guests + * - Implementation-defined features + */ + vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; + + /* Clear res0 and set res1 bits to trap potential new features. */ + vcpu->arch.hcr_el2 &= ~(HCR_RES0); + vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); + vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; + vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); +} + +/* + * Initialize trap register values for protected VMs. + */ +void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) +{ + pvm_init_trap_regs(vcpu); + pvm_init_traps_aa64pfr0(vcpu); + pvm_init_traps_aa64pfr1(vcpu); + pvm_init_traps_aa64dfr0(vcpu); + pvm_init_traps_aa64mmfr0(vcpu); + pvm_init_traps_aa64mmfr1(vcpu); +} -- cgit v1.2.3-58-ga151 From 72e1be120eaaf82a58c81fcf173cdb1d7a5dcfbb Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:34 +0100 Subject: KVM: arm64: Move sanitized copies of CPU features Move the sanitized copies of the CPU feature registers to the recently created sys_regs.c. This consolidates all copies in a more relevant file. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-10-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 6 ------ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2a07d63b8498..f6d96e60b323 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -25,12 +25,6 @@ struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; -/* - * Copies of the host's CPU features registers holding sanitized values. - */ -u64 id_aa64mmfr0_el1_sys_val; -u64 id_aa64mmfr1_el1_sys_val; - const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 6e3ea49af302..6bde2dc5205c 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -21,6 +21,8 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64mmfr0_el1_sys_val; +u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; /* -- cgit v1.2.3-58-ga151 From 1423afcb411780c7a6a68f801fdcfb6920ad6f06 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:35 +0100 Subject: KVM: arm64: Trap access to pVM restricted features Trap accesses to restricted features for VMs running in protected mode. Access to feature registers are emulated, and only supported features are exposed to protected VMs. Accesses to restricted registers as well as restricted instructions are trapped, and an undefined exception is injected into the protected guests, i.e., with EC = 0x0 (unknown reason). This EC is the one used, according to the Arm Architecture Reference Manual, for unallocated or undefined system registers or instructions. Only affects the functionality of protected VMs. Otherwise, should not affect non-protected VMs when KVM is running in protected mode. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-11-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 17d1a9512507..2c72c31e516e 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,49 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } +/** + * Handler for protected VM restricted exceptions. + * + * Inject an undefined exception into the guest and return true to indicate that + * the hypervisor has handled the exit, and control should go back to the guest. + */ +static bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + inject_undef64(vcpu); + return true; +} + +/** + * Handler for protected VM MSR, MRS or System instruction execution in AArch64. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_handle_pvm_sysreg(vcpu, exit_code)) + return true; + + return kvm_hyp_handle_sysreg(vcpu, exit_code); +} + +/** + * Handler for protected floating-point and Advanced SIMD accesses. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* Linux guests assume support for floating-point and Advanced SIMD. */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + return kvm_hyp_handle_fpsimd(vcpu, exit_code); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -170,8 +214,21 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; +static const exit_handler_fn pvm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64, + [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted, + [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { + if (unlikely(kvm_vm_is_protected(kvm))) + return pvm_exit_handlers; + return hyp_exit_handlers; } -- cgit v1.2.3-58-ga151 From 5f39efc42052b042c4d7ba6fd77934e8de43e10c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:36 +0100 Subject: KVM: arm64: Handle protected guests at 32 bits Protected KVM does not support protected AArch32 guests. However, it is possible for the guest to force run AArch32, potentially causing problems. Add an extra check so that if the hypervisor catches the guest doing that, it can prevent the guest from running again by resetting vcpu->arch.target and returning ARM_EXCEPTION_IL. If this were to happen, The VMM can try and fix it by re- initializing the vcpu with KVM_ARM_VCPU_INIT, however, this is likely not possible for protected VMs. Adapted from commit 22f553842b14 ("KVM: arm64: Handle Asymmetric AArch32 systems") Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-12-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 2c72c31e516e..f25b6353a598 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -232,6 +232,37 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) return hyp_exit_handlers; } +/* + * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. + * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a + * guest from dropping to AArch32 EL0 if implemented by the CPU. If the + * hypervisor spots a guest in such a state ensure it is handled, and don't + * trust the host to spot or fix it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + * + * Returns false if the guest ran in AArch32 when it shouldn't have, and + * thus should exit to the host, or true if a the guest run loop can continue. + */ +static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + + if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + /* + * As we have caught the guest red-handed, decide that it isn't + * fit for purpose anymore by making the vcpu invalid. The VMM + * can try and fix it by re-initializing the vcpu with + * KVM_ARM_VCPU_INIT, however, this is likely not possible for + * protected VMs. + */ + vcpu->arch.target = -1; + *exit_code = ARM_EXCEPTION_IL; + return false; + } + + return true; +} + /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { @@ -294,6 +325,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* Jump in the fire! */ exit_code = __guest_enter(vcpu); + if (unlikely(!handle_aarch32_guest(vcpu, &exit_code))) + break; + /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); -- cgit v1.2.3-58-ga151 From 69adec18e94ff3ca20447916a3bd23ab1d06b878 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 12 Oct 2021 12:23:12 +0100 Subject: KVM: arm64: Fix reporting of endianess when the access originates at EL0 We currently check SCTLR_EL1.EE when computing the address of a faulting guest access. However, the fault could have occured at EL0, in which case the right bit to check would be SCTLR_EL1.E0E. This is pretty unlikely to cause any issue in practice: You'd have to have a guest with a LE EL1 and a BE EL0 (or the other way around), and have mapped a device into the EL0 page tables. Good luck with that! Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20211012112312.1247467-1-maz@kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd418955e31e..f4871e47b2d0 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -396,7 +396,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + if (vcpu_mode_priv(vcpu)) + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); + else + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, -- cgit v1.2.3-58-ga151 From 562e530fd7707aad7fed953692d1835612238966 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:06 +0100 Subject: KVM: arm64: Force ID_AA64PFR0_EL1.GIC=1 when exposing a virtual GICv3 Until now, we always let ID_AA64PFR0_EL1.GIC reflect the value visible on the host, even if we were running a GICv2-enabled VM on a GICv3+compat host. That's fine, but we also now have the case of a host that does not expose ID_AA64PFR0_EL1.GIC==1 despite having a vGIC. Yes, this is confusing. Thank you M1. Let's go back to first principles and expose ID_AA64PFR0_EL1.GIC=1 when a GICv3 is exposed to the guest. This also hides a GICv4.1 CPU interface from the guest which has no business knowing about the v4.1 extension. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-2-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..0e8fc29df19c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1075,6 +1075,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + if (irqchip_in_kernel(vcpu->kvm) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); + } break; case SYS_ID_AA64PFR1_EL1: val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); -- cgit v1.2.3-58-ga151 From df652bcf1136db7f16e486a204ba4b4fc4181759 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:07 +0100 Subject: KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors The infamous M1 has a feature nobody else ever implemented, in the form of the "GIC locally generated SError interrupts", also known as SEIS for short. These SErrors are generated when a guest does something that violates the GIC state machine. It would have been simpler to just *ignore* the damned thing, but that's not what this HW does. Oh well. This part of of the architecture is also amazingly under-specified. There is a whole 10 lines that describe the feature in a spec that is 930 pages long, and some of these lines are factually wrong. Oh, and it is deprecated, so the insentive to clarify it is low. Now, the spec says that this should be a *virtual* SError when HCR_EL2.AMO is set. As it turns out, that's not always the case on this CPU, and the SError sometimes fires on the host as a physical SError. Goodbye, cruel world. This clearly is a HW bug, and it means that a guest can easily take the host down, on demand. Thankfully, we have seen systems that were just as broken in the past, and we have the perfect vaccine for it. Apple M1, please meet the Cavium ThunderX workaround. All your GIC accesses will be trapped, sanitised, and emulated. Only the signalling aspect of the HW will be used. It won't be super speedy, but it will at least be safe. You're most welcome. Given that this has only ever been seen on this single implementation, that the spec is unclear at best and that we cannot trust it to ever be implemented correctly, gate the workaround solely on ICH_VTR_EL2.SEIS being set. Tested-by: Joey Gouly Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-3-maz@kernel.org --- arch/arm64/kvm/vgic/vgic-v3.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 21a6207fb2ee..ae59e2580bf5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -671,6 +671,14 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { + kvm_info("GICv3 with locally generated SEI\n"); + + group0_trap = true; + group1_trap = true; + common_trap = true; + } + if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", group0_trap ? "G0" : "", -- cgit v1.2.3-58-ga151 From 0924729b21bffdd0e13f29ea6256d299fc807cff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:08 +0100 Subject: KVM: arm64: vgic-v3: Reduce common group trapping to ICV_DIR_EL1 when possible On systems that advertise ICH_VTR_EL2.SEIS, we trap all GICv3 sysreg accesses from the guest. From a performance perspective, this is OK as long as the guest doesn't hammer the GICv3 CPU interface. In most cases, this is fine, unless the guest actively uses priorities and switches PMR_EL1 very often. Which is exactly what happens when a Linux guest runs with irqchip.gicv3_pseudo_nmi=1. In these condition, the performance plumets as we hit PMR each time we mask/unmask interrupts. Not good. There is however an opportunity for improvement. Careful reading of the architecture specification indicates that the only GICv3 sysreg belonging to the common group (which contains the SGI registers, PMR, DIR, CTLR and RPR) that is allowed to generate a SError is DIR. Everything else is safe. It is thus possible to substitute the trapping of all the common group with just that of DIR if it supported by the implementation. Yes, that's yet another optional bit of the architecture. So let's just do that, as it leads to some impressive result on the M1: Without this change: bash-5.1# /host/home/maz/hackbench 100 process 1000 Running with 100*40 (== 4000) tasks. Time: 56.596 With this change: bash-5.1# /host/home/maz/hackbench 100 process 1000 Running with 100*40 (== 4000) tasks. Time: 8.649 which is a pretty convincing result. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20211010150910.2911495-4-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kvm/vgic/vgic-v3.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..9412a645a1c0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1152,6 +1152,7 @@ #define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_TDIR (1 << 14) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) @@ -1184,6 +1185,8 @@ #define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) #define ICH_VTR_A3V_SHIFT 21 #define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) +#define ICH_VTR_TDS_SHIFT 19 +#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) #define ARM64_FEATURE_FIELD_BITS 4 diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ae59e2580bf5..467c22bbade6 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -15,6 +15,7 @@ static bool group0_trap; static bool group1_trap; static bool common_trap; +static bool dir_trap; static bool gicv4_enable; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) @@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; if (common_trap) vgic_v3->vgic_hcr |= ICH_HCR_TC; + if (dir_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TDIR; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -676,14 +679,18 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group0_trap = true; group1_trap = true; - common_trap = true; + if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + dir_trap = true; + else + common_trap = true; } - if (group0_trap || group1_trap || common_trap) { - kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + if (group0_trap || group1_trap || common_trap | dir_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", group0_trap ? "G0" : "", group1_trap ? "G1" : "", - common_trap ? "C" : ""); + common_trap ? "C" : "", + dir_trap ? "D" : ""); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3-58-ga151 From f87ab682722299cddf8cf5f7bc17053d70300ee0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:09 +0100 Subject: KVM: arm64: vgic-v3: Don't advertise ICC_CTLR_EL1.SEIS Since we are trapping all sysreg accesses when ICH_VTR_EL2.SEIS is set, and that we never deliver an SError when emulating any of the GICv3 sysregs, don't advertise ICC_CTLR_EL1.SEIS. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-5-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 39f8f7f9227c..b3b50de496a3 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -987,8 +987,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; - /* SEIS */ - val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ -- cgit v1.2.3-58-ga151 From 9d449c71bd8f74282e84213c8f0b8328293ab0a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:10 +0100 Subject: KVM: arm64: vgic-v3: Align emulated cpuif LPI state machine with the pseudocode Having realised that a virtual LPI does transition through an active state that does not exist on bare metal, align the CPU interface emulation with the behaviour specified in the architecture pseudocode. The LPIs now transition to active on IAR read, and to inactive on EOI write. Special care is taken not to increment the EOIcount for an LPI that isn't present in the LRs. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-6-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index b3b50de496a3..20db2f281cf2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) goto spurious; lr_val &= ~ICH_LR_STATE; - /* No active state for LPIs */ - if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) - lr_val |= ICH_LR_ACTIVE_BIT; + lr_val |= ICH_LR_ACTIVE_BIT; __gic_v3_set_lr(lr_val, lr); __vgic_v3_set_active_priority(lr_prio, vmcr, grp); vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); @@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* Drop priority in any case */ act_prio = __vgic_v3_clear_highest_active_priority(); - /* If EOIing an LPI, no deactivate to be performed */ - if (vid >= VGIC_MIN_LPI) - return; - - /* EOImode == 1, nothing to be done here */ - if (vmcr & ICH_VMCR_EOIM_MASK) - return; - lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); if (lr == -1) { - __vgic_v3_bump_eoicount(); + /* Do not bump EOIcount for LPIs that aren't in the LRs */ + if (!(vid >= VGIC_MIN_LPI)) + __vgic_v3_bump_eoicount(); return; } + /* EOImode == 1 and not an LPI, nothing to be done here */ + if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI)) + return; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; /* If priorities or group do not match, the guest has fscked-up. */ -- cgit v1.2.3-58-ga151 From 3ef231670b9e9001316a426e794b2c74b8f6b4f6 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 7 Sep 2021 20:31:11 +0800 Subject: KVM: arm64: vgic: Add memcg accounting to vgic allocations Inspired by commit 254272ce6505 ("kvm: x86: Add memcg accounting to KVM allocations"), it would be better to make arm64 vgic consistent with common kvm codes. The memory allocations of VM scope should be charged into VM process cgroup, hence change GFP_KERNEL to GFP_KERNEL_ACCOUNT. There remain a few cases since these allocations are global, not in VM scope. Signed-off-by: Jia He Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210907123112.10232-2-justin.he@arm.com --- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-irqfd.c | 2 +- arch/arm64/kvm/vgic/vgic-its.c | 14 +++++++------- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- arch/arm64/kvm/vgic/vgic-v4.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 340c51d87677..0a06d0648970 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!dist->spis) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 79f8899b234c..475059bacedf 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT); if (!entries) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 61728c543eb9..b99f47103056 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (irq) return irq; - irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!irq) return ERR_PTR(-ENOMEM); @@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) * we must be careful not to overrun the array. */ irq_count = READ_ONCE(dist->lpi_list_count); - intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); if (!intids) return -ENOMEM; @@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its, if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) return E_ITS_MAPC_COLLECTION_OOR; - collection = kzalloc(sizeof(*collection), GFP_KERNEL); + collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT); if (!collection) return -ENOMEM; @@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, { struct its_ite *ite; - ite = kzalloc(sizeof(*ite), GFP_KERNEL); + ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT); if (!ite) return ERR_PTR(-ENOMEM); @@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its, { struct its_device *device; - device = kzalloc(sizeof(*device), GFP_KERNEL); + device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT); if (!device) return ERR_PTR(-ENOMEM); @@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm) struct vgic_translation_cache_entry *cte; /* An allocation failure is not fatal */ - cte = kzalloc(sizeof(*cte), GFP_KERNEL); + cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT); if (WARN_ON(!cte)) break; @@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) return -ENODEV; - its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT); if (!its) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a09cdc0b953c..d3fd8a6c0c9a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -834,7 +834,7 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, if (vgic_v3_rdist_overlap(kvm, base, size)) return -EINVAL; - rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT); if (!rdreg) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index c1845d8f5f7e..772dd15a22c7 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm) nr_vcpus = atomic_read(&kvm->online_vcpus); dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!dist->its_vm.vpes) return -ENOMEM; -- cgit v1.2.3-58-ga151 From 115bae923ac8bb29ee635e0ed6b4d5a3eec9371e Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 7 Sep 2021 20:31:12 +0800 Subject: KVM: arm64: Add memcg accounting to KVM allocations Inspired by commit 254272ce6505 ("kvm: x86: Add memcg accounting to KVM allocations"), it would be better to make arm64 KVM consistent with common kvm codes. The memory allocations of VM scope should be charged into VM process cgroup, hence change GFP_KERNEL to GFP_KERNEL_ACCOUNT. There remain a few cases since these allocations are global, not in VM scope. Signed-off-by: Jia He Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210907123112.10232-3-justin.he@arm.com --- arch/arm64/kvm/arm.c | 6 ++++-- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/pmu-emul.c | 2 +- arch/arm64/kvm/reset.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..e3d903e1b7da 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -291,10 +291,12 @@ long kvm_arch_dev_ioctl(struct file *filp, struct kvm *kvm_arch_alloc_vm(void) { + size_t sz = sizeof(struct kvm); + if (!has_vhe()) - return kzalloc(sizeof(struct kvm), GFP_KERNEL); + return kzalloc(sz, GFP_KERNEL_ACCOUNT); - return vzalloc(sizeof(struct kvm)); + return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a94a7ca48f2..da10996dcdf1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -512,7 +512,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) return -EINVAL; } - pgt = kzalloc(sizeof(*pgt), GFP_KERNEL); + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT); if (!pgt) return -ENOMEM; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2af3c37445e0..a5e4bbf5e68f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -978,7 +978,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&vcpu->kvm->lock); if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); + vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); if (!vcpu->kvm->arch.pmu_filter) { mutex_unlock(&vcpu->kvm->lock); return -ENOMEM; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..acad22ebac12 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -106,7 +106,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) vl > SVE_VL_ARCH_MAX)) return -EIO; - buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); + buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT); if (!buf) return -ENOMEM; -- cgit v1.2.3-58-ga151 From 8a049862c38f0c78b0e01ab5d36db1bffc832675 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:36 +0100 Subject: KVM: arm64: Fix early exit ptrauth handling The previous rework of the early exit code to provide an EC-based decoding tree missed the fact that we have two trap paths for ptrauth: the instructions (EC_PAC) and the sysregs (EC_SYS64). Rework the handlers to call the ptrauth handling code on both paths. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-2-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 481399bf9b94..4126926c3e06 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -282,14 +282,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) static inline bool esr_is_ptrauth_trap(u32 esr) { - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - switch (esr_sys64_to_sysreg(esr)) { case SYS_APIAKEYLO_EL1: case SYS_APIAKEYHI_EL1: @@ -323,8 +315,7 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) struct kvm_cpu_context *ctxt; u64 val; - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + if (!vcpu_has_ptrauth(vcpu)) return false; ctxt = this_cpu_ptr(&kvm_hyp_ctxt); @@ -353,6 +344,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) __vgic_v3_perform_cpuif_access(vcpu) == 1) return true; + if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return kvm_hyp_handle_ptrauth(vcpu, exit_code); + return false; } -- cgit v1.2.3-58-ga151 From ce75916749b8cb5ec795f1157a5c426f6765a48c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:37 +0100 Subject: KVM: arm64: pkvm: Use a single function to expose all id-regs Rather than exposing a whole set of helper functions to retrieve individual ID registers, use the existing decoding tree and expose a single helper instead. This allow a number of functions to be made static, and we now have a single entry point to maintain. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-3-maz@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 14 +---------- arch/arm64/kvm/hyp/nvhe/pkvm.c | 10 ++++---- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 37 ++++++++++++++++-------------- 3 files changed, 26 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h index 3288128738aa..8adc13227b1a 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h +++ b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h @@ -9,19 +9,7 @@ #include -u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu); - +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); int kvm_check_pvm_sysreg_table(void); void inject_undef64(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 633547cc1033..62377fa8a4cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -15,7 +15,7 @@ */ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64pfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); u64 hcr_set = HCR_RW; u64 hcr_clear = 0; u64 cptr_set = 0; @@ -62,7 +62,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64pfr1(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); u64 hcr_set = 0; u64 hcr_clear = 0; @@ -81,7 +81,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64dfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); u64 mdcr_set = 0; u64 mdcr_clear = 0; u64 cptr_set = 0; @@ -125,7 +125,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64mmfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); u64 mdcr_set = 0; /* Trap Debug Communications Channel registers */ @@ -140,7 +140,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64mmfr1(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); u64 hcr_set = 0; /* Trap LOR */ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 6bde2dc5205c..f125d6a52880 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -82,7 +82,7 @@ static u64 get_restricted_features_unsigned(u64 sys_reg_val, * based on allowed features, system features, and KVM support. */ -u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) { const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); u64 set_mask = 0; @@ -103,7 +103,7 @@ u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; } -u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) { const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; @@ -114,7 +114,7 @@ u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) return id_aa64pfr1_el1_sys_val & allow_mask; } -u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) { /* * No support for Scalable Vectors, therefore, hyp has no sanitized @@ -124,7 +124,7 @@ u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) { /* * No support for debug, including breakpoints, and watchpoints, @@ -134,7 +134,7 @@ u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) { /* * No support for debug, therefore, hyp has no sanitized copy of the @@ -144,7 +144,7 @@ u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) { /* * No support for implementation defined features, therefore, hyp has no @@ -154,7 +154,7 @@ u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) { /* * No support for implementation defined features, therefore, hyp has no @@ -164,12 +164,12 @@ u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) { return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; } -u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) { u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; @@ -182,7 +182,7 @@ u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } -u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -192,22 +192,19 @@ u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; } -u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) { return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; } -u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) { return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; } -/* Read a sanitized cpufeature ID register by its sys_reg_desc. */ -static u64 read_id_reg(const struct kvm_vcpu *vcpu, - struct sys_reg_desc const *r) +/* Read a sanitized cpufeature ID register by its encoding */ +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) { - u32 id = reg_to_encoding(r); - switch (id) { case SYS_ID_AA64PFR0_EL1: return get_pvm_id_aa64pfr0(vcpu); @@ -245,6 +242,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return 0; } +static u64 read_id_reg(const struct kvm_vcpu *vcpu, + struct sys_reg_desc const *r) +{ + return pvm_read_id_reg(vcpu, reg_to_encoding(r)); +} + /* * Accessor for AArch32 feature id registers. * -- cgit v1.2.3-58-ga151 From 8ffb41888334c1247bd9b4d6ff6c092a90e8d0b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:38 +0100 Subject: KVM: arm64: pkvm: Make the ERR/ERX*_EL1 registers RAZ/WI The ERR*/ERX* registers should be handled as RAZ/WI, and there should be no need to involve EL1 for that. Add a helper that handles such registers, and repaint the sysreg table to declare these registers as RAZ/WI. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-4-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index f125d6a52880..042a1c0be7e0 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -248,6 +248,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return pvm_read_id_reg(vcpu, reg_to_encoding(r)); } +/* Handler to RAZ/WI sysregs */ +static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!p->is_write) + p->regval = 0; + + return true; +} + /* * Accessor for AArch32 feature id registers. * @@ -270,9 +280,7 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); - /* Use 0 for architecturally "unknown" values. */ - p->regval = 0; - return true; + return pvm_access_raz_wi(vcpu, p, r); } /* @@ -301,6 +309,9 @@ static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, /* Mark the specified system register as an AArch64 feature id register. */ #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } +/* Mark the specified system register as Read-As-Zero/Write-Ignored */ +#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } + /* Mark the specified system register as not being handled in hyp. */ #define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL } @@ -388,14 +399,14 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_AFSR1_EL1), HOST_HANDLED(SYS_ESR_EL1), - HOST_HANDLED(SYS_ERRIDR_EL1), - HOST_HANDLED(SYS_ERRSELR_EL1), - HOST_HANDLED(SYS_ERXFR_EL1), - HOST_HANDLED(SYS_ERXCTLR_EL1), - HOST_HANDLED(SYS_ERXSTATUS_EL1), - HOST_HANDLED(SYS_ERXADDR_EL1), - HOST_HANDLED(SYS_ERXMISC0_EL1), - HOST_HANDLED(SYS_ERXMISC1_EL1), + RAZ_WI(SYS_ERRIDR_EL1), + RAZ_WI(SYS_ERRSELR_EL1), + RAZ_WI(SYS_ERXFR_EL1), + RAZ_WI(SYS_ERXCTLR_EL1), + RAZ_WI(SYS_ERXSTATUS_EL1), + RAZ_WI(SYS_ERXADDR_EL1), + RAZ_WI(SYS_ERXMISC0_EL1), + RAZ_WI(SYS_ERXMISC1_EL1), HOST_HANDLED(SYS_TFSR_EL1), HOST_HANDLED(SYS_TFSRE0_EL1), -- cgit v1.2.3-58-ga151 From 3c90cb15e2e66bcc526d25133747b2af747f6cd8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:39 +0100 Subject: KVM: arm64: pkvm: Drop AArch32-specific registers All the SYS_*32_EL2 registers are AArch32-specific. Since we forbid AArch32, there is no need to handle those in any way. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-5-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 042a1c0be7e0..e2b3a9e167da 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -452,10 +452,6 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CNTP_CVAL_EL0), /* Performance Monitoring Registers are restricted. */ - - HOST_HANDLED(SYS_DACR32_EL2), - HOST_HANDLED(SYS_IFSR32_EL2), - HOST_HANDLED(SYS_FPEXC32_EL2), }; /* -- cgit v1.2.3-58-ga151 From f3d5ccabab20c1be5838831f460f320a12e5e2c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:40 +0100 Subject: KVM: arm64: pkvm: Drop sysregs that should never be routed to the host A bunch of system registers (most of them MM related) should never trap to the host under any circumstance. Keep them close to our chest. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-6-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 50 -------------------------------------- 1 file changed, 50 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index e2b3a9e167da..eb4ee2589316 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -371,34 +371,8 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1), - HOST_HANDLED(SYS_SCTLR_EL1), - HOST_HANDLED(SYS_ACTLR_EL1), - HOST_HANDLED(SYS_CPACR_EL1), - - HOST_HANDLED(SYS_RGSR_EL1), - HOST_HANDLED(SYS_GCR_EL1), - /* Scalable Vector Registers are restricted. */ - HOST_HANDLED(SYS_TTBR0_EL1), - HOST_HANDLED(SYS_TTBR1_EL1), - HOST_HANDLED(SYS_TCR_EL1), - - HOST_HANDLED(SYS_APIAKEYLO_EL1), - HOST_HANDLED(SYS_APIAKEYHI_EL1), - HOST_HANDLED(SYS_APIBKEYLO_EL1), - HOST_HANDLED(SYS_APIBKEYHI_EL1), - HOST_HANDLED(SYS_APDAKEYLO_EL1), - HOST_HANDLED(SYS_APDAKEYHI_EL1), - HOST_HANDLED(SYS_APDBKEYLO_EL1), - HOST_HANDLED(SYS_APDBKEYHI_EL1), - HOST_HANDLED(SYS_APGAKEYLO_EL1), - HOST_HANDLED(SYS_APGAKEYHI_EL1), - - HOST_HANDLED(SYS_AFSR0_EL1), - HOST_HANDLED(SYS_AFSR1_EL1), - HOST_HANDLED(SYS_ESR_EL1), - RAZ_WI(SYS_ERRIDR_EL1), RAZ_WI(SYS_ERRSELR_EL1), RAZ_WI(SYS_ERXFR_EL1), @@ -408,31 +382,12 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { RAZ_WI(SYS_ERXMISC0_EL1), RAZ_WI(SYS_ERXMISC1_EL1), - HOST_HANDLED(SYS_TFSR_EL1), - HOST_HANDLED(SYS_TFSRE0_EL1), - - HOST_HANDLED(SYS_FAR_EL1), - HOST_HANDLED(SYS_PAR_EL1), - /* Performance Monitoring Registers are restricted. */ - HOST_HANDLED(SYS_MAIR_EL1), - HOST_HANDLED(SYS_AMAIR_EL1), - /* Limited Ordering Regions Registers are restricted. */ - HOST_HANDLED(SYS_VBAR_EL1), - HOST_HANDLED(SYS_DISR_EL1), - /* GIC CPU Interface registers are restricted. */ - HOST_HANDLED(SYS_CONTEXTIDR_EL1), - HOST_HANDLED(SYS_TPIDR_EL1), - - HOST_HANDLED(SYS_SCXTNUM_EL1), - - HOST_HANDLED(SYS_CNTKCTL_EL1), - HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), @@ -440,11 +395,6 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Performance Monitoring Registers are restricted. */ - HOST_HANDLED(SYS_TPIDR_EL0), - HOST_HANDLED(SYS_TPIDRRO_EL0), - - HOST_HANDLED(SYS_SCXTNUM_EL0), - /* Activity Monitoring Registers are restricted. */ HOST_HANDLED(SYS_CNTP_TVAL_EL0), -- cgit v1.2.3-58-ga151 From cbca19738472be8156d854663ed724b01255c932 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:41 +0100 Subject: KVM: arm64: pkvm: Handle GICv3 traps as required Forward accesses to the ICV_*SGI*_EL1 registers to EL1, and emulate ICV_SRE_EL1 by returning a fixed value. This should be enough to support GICv3 in a protected guest. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-7-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index eb4ee2589316..a341bd8ef252 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -4,6 +4,8 @@ * Author: Fuad Tabba */ +#include + #include #include #include @@ -303,6 +305,17 @@ static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, return true; } +static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + /* pVMs only support GICv3. 'nuf said. */ + if (!p->is_write) + p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE; + + return true; +} + /* Mark the specified system register as an AArch32 feature id register. */ #define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } @@ -386,7 +399,10 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Limited Ordering Regions Registers are restricted. */ - /* GIC CPU Interface registers are restricted. */ + HOST_HANDLED(SYS_ICC_SGI1R_EL1), + HOST_HANDLED(SYS_ICC_ASGI1R_EL1), + HOST_HANDLED(SYS_ICC_SGI0R_EL1), + { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, }, HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), -- cgit v1.2.3-58-ga151 From 271b7286058da636ab6f5f47722e098ca3a0478b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:42 +0100 Subject: KVM: arm64: pkvm: Preserve pending SError on exit from AArch32 Don't drop a potential SError when a guest gets caught red-handed running AArch32 code. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-8-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/switch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f25b6353a598..481c365ef144 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -256,7 +256,8 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) * protected VMs. */ vcpu->arch.target = -1; - *exit_code = ARM_EXCEPTION_IL; + *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); + *exit_code |= ARM_EXCEPTION_IL; return false; } -- cgit v1.2.3-58-ga151 From 3061725d162cad0589b012fc6413c9dd0da8f02a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:43 +0100 Subject: KVM: arm64: pkvm: Consolidate include files kvm_fixed_config.h is pkvm specific, and would be better placed near its users. At the same time, include/nvhe/sys_regs.h is now almost empty. Merge the two into arch/arm64/kvm/hyp/include/nvhe/fixed_config.h. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-9-maz@kernel.org --- arch/arm64/include/asm/kvm_fixed_config.h | 195 ------------------------ arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 200 +++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 17 --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 3 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 3 +- 7 files changed, 204 insertions(+), 219 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_fixed_config.h create mode 100644 arch/arm64/kvm/hyp/include/nvhe/fixed_config.h delete mode 100644 arch/arm64/kvm/hyp/include/nvhe/sys_regs.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_fixed_config.h b/arch/arm64/include/asm/kvm_fixed_config.h deleted file mode 100644 index 0ed06923f7e9..000000000000 --- a/arch/arm64/include/asm/kvm_fixed_config.h +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Google LLC - * Author: Fuad Tabba - */ - -#ifndef __ARM64_KVM_FIXED_CONFIG_H__ -#define __ARM64_KVM_FIXED_CONFIG_H__ - -#include - -/* - * This file contains definitions for features to be allowed or restricted for - * guest virtual machines, depending on the mode KVM is running in and on the - * type of guest that is running. - * - * The ALLOW masks represent a bitmask of feature fields that are allowed - * without any restrictions as long as they are supported by the system. - * - * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for - * features that are restricted to support at most the specified feature. - * - * If a feature field is not present in either, than it is not supported. - * - * The approach taken for protected VMs is to allow features that are: - * - Needed by common Linux distributions (e.g., floating point) - * - Trivial to support, e.g., supporting the feature does not introduce or - * require tracking of additional state in KVM - * - Cannot be trapped or prevent the guest from using anyway - */ - -/* - * Allow for protected VMs: - * - Floating-point and Advanced SIMD - * - Data Independent Timing - */ -#define PVM_ID_AA64PFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - AArch64 guests only (no support for AArch32 guests): - * AArch32 adds complexity in trap handling, emulation, condition codes, - * etc... - * - RAS (v1) - * Supported by KVM - */ -#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ - ) - -/* - * Allow for protected VMs: - * - Branch Target Identification - * - Speculative Store Bypassing - */ -#define PVM_ID_AA64PFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ - ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ - ) - -/* - * Allow for protected VMs: - * - Mixed-endian - * - Distinction between Secure and Non-secure Memory - * - Mixed-endian at EL0 only - * - Non-context synchronizing exception entry and exit - */ -#define PVM_ID_AA64MMFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - 40-bit IPA - * - 16-bit ASID - */ -#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ - ) - -/* - * Allow for protected VMs: - * - Hardware translation table updates to Access flag and Dirty state - * - Number of VMID bits from CPU - * - Hierarchical Permission Disables - * - Privileged Access Never - * - SError interrupt exceptions from speculative reads - * - Enhanced Translation Synchronization - */ -#define PVM_ID_AA64MMFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ - ) - -/* - * Allow for protected VMs: - * - Common not Private translations - * - User Access Override - * - IESB bit in the SCTLR_ELx registers - * - Unaligned single-copy atomicity and atomic functions - * - ESR_ELx.EC value on an exception by read access to feature ID space - * - TTL field in address operations. - * - Break-before-make sequences when changing translation block size - * - E0PDx mechanism - */ -#define PVM_ID_AA64MMFR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ - ) - -/* - * No support for Scalable Vectors for protected VMs: - * Requires additional support from KVM, e.g., context-switching and - * trapping at EL2 - */ -#define PVM_ID_AA64ZFR0_ALLOW (0ULL) - -/* - * No support for debug, including breakpoints, and watchpoints for protected - * VMs: - * The Arm architecture mandates support for at least the Armv8 debug - * architecture, which would include at least 2 hardware breakpoints and - * watchpoints. Providing that support to protected guests adds - * considerable state and complexity. Therefore, the reserved value of 0 is - * used for debug-related fields. - */ -#define PVM_ID_AA64DFR0_ALLOW (0ULL) -#define PVM_ID_AA64DFR1_ALLOW (0ULL) - -/* - * No support for implementation defined features. - */ -#define PVM_ID_AA64AFR0_ALLOW (0ULL) -#define PVM_ID_AA64AFR1_ALLOW (0ULL) - -/* - * No restrictions on instructions implemented in AArch64. - */ -#define PVM_ID_AA64ISAR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ - ) - -#define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ - ) - -#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h new file mode 100644 index 000000000000..747fc79ae784 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_FIXED_CONFIG_H__ +#define __ARM64_KVM_FIXED_CONFIG_H__ + +#include + +/* + * This file contains definitions for features to be allowed or restricted for + * guest virtual machines, depending on the mode KVM is running in and on the + * type of guest that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ) + +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +int kvm_check_pvm_sysreg_table(void); +void inject_undef64(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h deleted file mode 100644 index 8adc13227b1a..000000000000 --- a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Google LLC - * Author: Fuad Tabba - */ - -#ifndef __ARM64_KVM_NVHE_SYS_REGS_H__ -#define __ARM64_KVM_NVHE_SYS_REGS_H__ - -#include - -u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); -bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); -int kvm_check_pvm_sysreg_table(void); -void inject_undef64(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_NVHE_SYS_REGS_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 62377fa8a4cb..99c8d8b73e70 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -6,8 +6,7 @@ #include #include -#include -#include +#include #include /* diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index c85ff64e63f2..862c7b514e20 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -10,11 +10,11 @@ #include #include +#include #include #include #include #include -#include #include struct hyp_pool hpool; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 481c365ef144..317dba6a018d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -28,8 +27,8 @@ #include #include +#include #include -#include /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index a341bd8ef252..052f885e65b2 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -7,12 +7,11 @@ #include #include -#include #include #include -#include +#include #include "../../sys_regs.h" -- cgit v1.2.3-58-ga151 From 746bdeadc53b0d58fddea6442591f5ec3eeabe7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:44 +0100 Subject: KVM: arm64: pkvm: Move kvm_handle_pvm_restricted around Place kvm_handle_pvm_restricted() next to its little friends such as kvm_handle_pvm_sysreg(). This allows to make inject_undef64() static. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-10-maz@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 12 ------------ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 14 +++++++++++++- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 747fc79ae784..eea1f6a53723 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -194,7 +194,7 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); int kvm_check_pvm_sysreg_table(void); -void inject_undef64(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 317dba6a018d..be6889e33b2b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -159,18 +159,6 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } -/** - * Handler for protected VM restricted exceptions. - * - * Inject an undefined exception into the guest and return true to indicate that - * the hypervisor has handled the exit, and control should go back to the guest. - */ -static bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - inject_undef64(vcpu); - return true; -} - /** * Handler for protected VM MSR, MRS or System instruction execution in AArch64. * diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 052f885e65b2..3787ee6fb1a2 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -30,7 +30,7 @@ u64 id_aa64mmfr2_el1_sys_val; * Inject an unknown/undefined exception to an AArch64 guest while most of its * sysregs are live. */ -void inject_undef64(struct kvm_vcpu *vcpu) +static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); @@ -473,3 +473,15 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } + +/** + * Handler for protected VM restricted exceptions. + * + * Inject an undefined exception into the guest and return true to indicate that + * the hypervisor has handled the exit, and control should go back to the guest. + */ +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + inject_undef64(vcpu); + return true; +} -- cgit v1.2.3-58-ga151 From 0c7639cc838263b6e38b3af76755d574f15cdf41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:45 +0100 Subject: KVM: arm64: pkvm: Pass vpcu instead of kvm to kvm_get_exit_handler_array() Passing a VM pointer around is odd, and results in extra work on VHE. Follow the rest of the design that uses the vcpu instead, and let the nVHE code look into the struct kvm as required. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-11-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 4126926c3e06..c6e98c7e918b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -397,7 +397,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); /* * Allow the hypervisor to handle the exit with an exit handler if it has one. @@ -407,7 +407,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); */ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(kern_hyp_va(vcpu->kvm)); + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); exit_handler_fn fn; fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index be6889e33b2b..50c7d48e0fa0 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -211,9 +211,9 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kvm))) + if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) return pvm_exit_handlers; return hyp_exit_handlers; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index f6fb97accf65..5a2cb5d9bc4b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -107,7 +107,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { return hyp_exit_handlers; } -- cgit v1.2.3-58-ga151 From 07305590114af81817148d181f1eb0af294e40d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:46 +0100 Subject: KVM: arm64: pkvm: Give priority to standard traps over pvm handling Checking for pvm handling first means that we cannot handle ptrauth traps or apply any of the workarounds (GICv3 or TX2 #219). Flip the order around. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-12-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/switch.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 50c7d48e0fa0..c0e3fed26d93 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -167,10 +167,13 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) */ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (kvm_handle_pvm_sysreg(vcpu, exit_code)) - return true; - - return kvm_hyp_handle_sysreg(vcpu, exit_code); + /* + * Make sure we handle the exit for workarounds and ptrauth + * before the pKVM handling, as the latter could decide to + * UNDEF. + */ + return (kvm_hyp_handle_sysreg(vcpu, exit_code) || + kvm_handle_pvm_sysreg(vcpu, exit_code)); } /** -- cgit v1.2.3-58-ga151