diff options
author | Huacai Chen <chenhuacai@loongson.cn> | 2023-11-01 10:55:00 +0800 |
---|---|---|
committer | Huacai Chen <chenhuacai@loongson.cn> | 2023-11-01 10:55:00 +0800 |
commit | a6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch) | |
tree | fa630701d5d2a8bc1ab8c4abf759663bbb81aeeb /arch/arm | |
parent | ffc253263a1375a65fa6c9f62a893e9767fbebfa (diff) | |
parent | 99c9991f4e5d77328187187d0c921a3b62bfa998 (diff) |
Merge 'bpf-next 2023-10-16' into loongarch-next
LoongArch architecture changes for 6.7 (BPF CPU v4 support) depend on
the bpf changes to fix conflictions in selftests and work, so merge them
to create a base.
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 280 | ||||
-rw-r--r-- | arch/arm/net/bpf_jit_32.h | 4 |
2 files changed, 264 insertions, 20 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6a1c9fca5260..1d672457d02f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -2,6 +2,7 @@ /* * Just-In-Time compiler for eBPF filters on 32bit ARM * + * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com> * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> */ @@ -15,6 +16,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <linux/math64.h> #include <asm/cacheflush.h> #include <asm/hwcap.h> @@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor) return dividend % divisor; } +static s32 jit_sdiv32(s32 dividend, s32 divisor) +{ + return dividend / divisor; +} + +static s32 jit_smod32(s32 dividend, s32 divisor) +{ + return dividend % divisor; +} + +/* Wrappers for 64-bit div/mod */ +static u64 jit_udiv64(u64 dividend, u64 divisor) +{ + return div64_u64(dividend, divisor); +} + +static u64 jit_mod64(u64 dividend, u64 divisor) +{ + u64 rem; + + div64_u64_rem(dividend, divisor, &rem); + return rem; +} + +static s64 jit_sdiv64(s64 dividend, s64 divisor) +{ + return div64_s64(dividend, divisor); +} + +static s64 jit_smod64(s64 dividend, s64 divisor) +{ + u64 q; + + q = div64_s64(dividend, divisor); + + return dividend - q * divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) +#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) +#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) + #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) @@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from - 2; } -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign) { const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; + u32 dst; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { - if (op == BPF_DIV) - emit(ARM_UDIV(rd, rm, rn), ctx); - else { - emit(ARM_UDIV(ARM_IP, rm, rn), ctx); + if (op == BPF_DIV) { + emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx); + } else { + emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx); emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); } return; @@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); /* Call appropriate function */ - emit_mov_i(ARM_IP, op == BPF_DIV ? - (u32)jit_udiv32 : (u32)jit_mod32, ctx); + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv32; + else + dst = (u32)jit_smod32; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv32; + else + dst = (u32)jit_mod32; + } + + emit_mov_i(ARM_IP, dst, ctx); emit_blx_r(ARM_IP, ctx); /* Restore caller-saved registers from stack */ @@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } +static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, + u8 op, u8 sign) +{ + u32 dst; + + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK), ctx); + + /* + * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in + * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, + * we can recover them later after returning from the function call. + */ + if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { + /* + * Move Rm to {R1, R0} if it is not already there. + */ + if (rm[1] != ARM_R0) { + if (rn[1] == ARM_R0) + emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); + emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); + if (rn[1] == ARM_R0) { + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + goto cont; + } + } + /* + * Move Rn to {R3, R2} if it is not already there. + */ + if (rn[1] != ARM_R2) { + emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); + emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); + } + } + +cont: + + /* Call appropriate function */ + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv64; + else + dst = (u32)jit_smod64; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv64; + else + dst = (u32)jit_mod64; + } + + emit_mov_i(ARM_IP, dst, ctx); + emit_blx_r(ARM_IP, ctx); + + /* Save return value */ + if (rd[1] != ARM_R0) { + emit(ARM_MOV_R(rd[0], ARM_R1), ctx); + emit(ARM_MOV_R(rd[1], ARM_R0), ctx); + } + + /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ + if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { + emit(ARM_POP(CALLER_MASK), ctx); + } else if (rd[1] != ARM_R0) { + emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + } else { + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + } +} + /* Is the translated BPF register on stack? */ static bool is_stacked(s8 reg) { @@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); + if (off && off != 32) { + emit(ARM_LSL_I(rt, rt, 32 - off), ctx); + emit(ARM_ASR_I(rt, rt, 32 - off), ctx); + } arm_bpf_put_reg32(dst, rt, ctx); } @@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, ctx); - emit_a32_mov_r(dst_hi, src_hi, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_hi, src_hi, 0, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], } } +/* dst = (signed)src */ +static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rt; + + rt = arm_bpf_get_reg64(dst, tmp, ctx); + + emit_a32_mov_r(dst_lo, src_lo, off, ctx); + if (!is64) { + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else { + emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + } +} + /* Shift operations */ static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { @@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size) return -off_max <= off && off <= off_max; } +static bool is_ldst_imm8(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + off_max = 0xff; + break; + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], s16 off, struct jit_ctx *ctx, const u8 sz){ @@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, arm_bpf_put_reg64(dst, rd, ctx); } +/* dst = *(signed size*)(src + off) */ +static inline void emit_ldsx_r(const s8 dst[], const s8 src, + s16 off, struct jit_ctx *ctx, const u8 sz){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + s8 rm = src; + int add_off; + + if (!is_ldst_imm8(off, sz)) { + /* + * offset does not fit in the load/store immediate, + * construct an ADD instruction to apply the offset. + */ + add_off = imm8m(off); + if (add_off > 0) { + emit(ARM_ADD_I(tmp[0], src, add_off), ctx); + rm = tmp[0]; + } else { + emit_a32_mov_i(tmp[0], off, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); + rm = tmp[0]; + } + off = 0; + } + + switch (sz) { + case BPF_B: + /* Load a Byte with sign extension*/ + emit(ARM_LDRSB_I(rd[1], rm, off), ctx); + break; + case BPF_H: + /* Load a HalfWord with sign extension*/ + emit(ARM_LDRSH_I(rd[1], rm, off), ctx); + break; + case BPF_W: + /* Load a Word*/ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + break; + } + /* Carry the sign extension to upper 32 bits */ + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Arithmatic Operation */ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op, @@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_a32_mov_i(dst_hi, 0, ctx); break; } - emit_a32_mov_r64(is64, dst, src, ctx); + if (insn->off) + emit_a32_movsx_r64(is64, insn->off, dst, src, ctx); + else + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ @@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); if (!ctx->prog->aux->verifier_zext) emit_a32_mov_i(dst_hi, 0, ctx); @@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: - goto notyet; + rd = arm_bpf_get_reg64(dst, tmp2, ctx); + switch (BPF_SRC(code)) { + case BPF_X: + rs = arm_bpf_get_reg64(src, tmp, ctx); + break; + case BPF_K: + rs = tmp; + emit_a32_mov_se_i64(is64, rs, imm, ctx); + break; + } + emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); + arm_bpf_put_reg64(dst, rd, ctx); + break; /* dst = dst << imm */ /* dst = dst >> imm */ /* dst = dst >> imm (signed) */ @@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* dst = htole(dst) */ /* dst = htobe(dst) */ - case BPF_ALU | BPF_END | BPF_FROM_LE: - case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ + case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */ + /* dst = bswap(dst) */ + case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ rd = arm_bpf_get_reg64(dst, tmp, ctx); - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64) goto emit_bswap_uxt; switch (imm) { case 16: @@ -1603,8 +1831,15 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); + if (BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code)); + else + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* speculation barrier */ case BPF_ST | BPF_NOSPEC: @@ -1761,10 +1996,15 @@ go_jmp: break; /* JMP OFF */ case BPF_JMP | BPF_JA: + case BPF_JMP32 | BPF_JA: { - if (off == 0) + if (BPF_CLASS(code) == BPF_JMP32 && imm != 0) + jmp_offset = bpf2a32_offset(i + imm, i, ctx); + else if (BPF_CLASS(code) == BPF_JMP && off != 0) + jmp_offset = bpf2a32_offset(i + off, i, ctx); + else break; - jmp_offset = bpf2a32_offset(i+off, i, ctx); + check_imm24(jmp_offset); emit(ARM_B(jmp_offset), ctx); break; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e0b593a1498d..438f0e1f91a0 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -79,9 +79,11 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRSB_I 0x015000d0 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 +#define ARM_INST_LDRSH_I 0x015000f0 #define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 @@ -137,6 +139,7 @@ #define ARM_INST_TST_I 0x03100000 #define ARM_INST_UDIV 0x0730f010 +#define ARM_INST_SDIV 0x0710f010 #define ARM_INST_UMULL 0x00800090 @@ -265,6 +268,7 @@ #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) #define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8) +#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8) #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ | (rd_lo) << 12 | (rm) << 8 | rn) |