summaryrefslogtreecommitdiff
path: root/arch/arm
diff options
context:
space:
mode:
authorHuacai Chen <chenhuacai@loongson.cn>2023-11-01 10:55:00 +0800
committerHuacai Chen <chenhuacai@loongson.cn>2023-11-01 10:55:00 +0800
commita6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch)
treefa630701d5d2a8bc1ab8c4abf759663bbb81aeeb /arch/arm
parentffc253263a1375a65fa6c9f62a893e9767fbebfa (diff)
parent99c9991f4e5d77328187187d0c921a3b62bfa998 (diff)
Merge 'bpf-next 2023-10-16' into loongarch-next
LoongArch architecture changes for 6.7 (BPF CPU v4 support) depend on the bpf changes to fix conflictions in selftests and work, so merge them to create a base.
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/net/bpf_jit_32.c280
-rw-r--r--arch/arm/net/bpf_jit_32.h4
2 files changed, 264 insertions, 20 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6a1c9fca5260..1d672457d02f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2,6 +2,7 @@
/*
* Just-In-Time compiler for eBPF filters on 32bit ARM
*
+ * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
* Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*/
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/math64.h>
#include <asm/cacheflush.h>
#include <asm/hwcap.h>
@@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
return dividend % divisor;
}
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+ return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+ return dividend % divisor;
+}
+
+/* Wrappers for 64-bit div/mod */
+static u64 jit_udiv64(u64 dividend, u64 divisor)
+{
+ return div64_u64(dividend, divisor);
+}
+
+static u64 jit_mod64(u64 dividend, u64 divisor)
+{
+ u64 rem;
+
+ div64_u64_rem(dividend, divisor, &rem);
+ return rem;
+}
+
+static s64 jit_sdiv64(s64 dividend, s64 divisor)
+{
+ return div64_s64(dividend, divisor);
+}
+
+static s64 jit_smod64(s64 dividend, s64 divisor)
+{
+ u64 q;
+
+ q = div64_s64(dividend, divisor);
+
+ return dividend - q * divisor;
+}
+
static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
{
inst |= (cond << 28);
@@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
+#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
+#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
+
#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
return to - from - 2;
}
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
{
const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
const s8 *tmp = bpf2a32[TMP_REG_1];
+ u32 dst;
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
- if (op == BPF_DIV)
- emit(ARM_UDIV(rd, rm, rn), ctx);
- else {
- emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+ if (op == BPF_DIV) {
+ emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
+ } else {
+ emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
}
return;
@@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
/* Call appropriate function */
- emit_mov_i(ARM_IP, op == BPF_DIV ?
- (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv32;
+ else
+ dst = (u32)jit_smod32;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv32;
+ else
+ dst = (u32)jit_mod32;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
emit_blx_r(ARM_IP, ctx);
/* Restore caller-saved registers from stack */
@@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}
+static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
+ u8 op, u8 sign)
+{
+ u32 dst;
+
+ /* Push caller-saved registers on stack */
+ emit(ARM_PUSH(CALLER_MASK), ctx);
+
+ /*
+ * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
+ * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
+ * we can recover them later after returning from the function call.
+ */
+ if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
+ /*
+ * Move Rm to {R1, R0} if it is not already there.
+ */
+ if (rm[1] != ARM_R0) {
+ if (rn[1] == ARM_R0)
+ emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
+ emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
+ if (rn[1] == ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ goto cont;
+ }
+ }
+ /*
+ * Move Rn to {R3, R2} if it is not already there.
+ */
+ if (rn[1] != ARM_R2) {
+ emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
+ emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
+ }
+ }
+
+cont:
+
+ /* Call appropriate function */
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv64;
+ else
+ dst = (u32)jit_smod64;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv64;
+ else
+ dst = (u32)jit_mod64;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
+ emit_blx_r(ARM_IP, ctx);
+
+ /* Save return value */
+ if (rd[1] != ARM_R0) {
+ emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
+ emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
+ }
+
+ /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
+ if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
+ emit(ARM_POP(CALLER_MASK), ctx);
+ } else if (rd[1] != ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ } else {
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ }
+}
+
/* Is the translated BPF register on stack? */
static bool is_stacked(s8 reg)
{
@@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}
/* dst = src (4 bytes)*/
-static inline void emit_a32_mov_r(const s8 dst, const s8 src,
+static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+ if (off && off != 32) {
+ emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
+ emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
+ }
arm_bpf_put_reg32(dst, rt, ctx);
}
@@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
- emit_a32_mov_r(dst_lo, src_lo, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
- emit_a32_mov_r(dst_lo, src_lo, ctx);
- emit_a32_mov_r(dst_hi, src_hi, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
+ emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];
@@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
}
}
+/* dst = (signed)src */
+static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
+ struct jit_ctx *ctx) {
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rt;
+
+ rt = arm_bpf_get_reg64(dst, tmp, ctx);
+
+ emit_a32_mov_r(dst_lo, src_lo, off, ctx);
+ if (!is64) {
+ if (!ctx->prog->aux->verifier_zext)
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, ctx);
+ } else {
+ emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
+ }
+}
+
/* Shift operations */
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
struct jit_ctx *ctx, const u8 op) {
@@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size)
return -off_max <= off && off <= off_max;
}
+static bool is_ldst_imm8(s16 off, const u8 size)
+{
+ s16 off_max = 0;
+
+ switch (size) {
+ case BPF_B:
+ off_max = 0xff;
+ break;
+ case BPF_W:
+ off_max = 0xfff;
+ break;
+ case BPF_H:
+ off_max = 0xff;
+ break;
+ }
+ return -off_max <= off && off <= off_max;
+}
+
/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
s16 off, struct jit_ctx *ctx, const u8 sz){
@@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
arm_bpf_put_reg64(dst, rd, ctx);
}
+/* dst = *(signed size*)(src + off) */
+static inline void emit_ldsx_r(const s8 dst[], const s8 src,
+ s16 off, struct jit_ctx *ctx, const u8 sz){
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+ s8 rm = src;
+ int add_off;
+
+ if (!is_ldst_imm8(off, sz)) {
+ /*
+ * offset does not fit in the load/store immediate,
+ * construct an ADD instruction to apply the offset.
+ */
+ add_off = imm8m(off);
+ if (add_off > 0) {
+ emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
+ rm = tmp[0];
+ } else {
+ emit_a32_mov_i(tmp[0], off, ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+ rm = tmp[0];
+ }
+ off = 0;
+ }
+
+ switch (sz) {
+ case BPF_B:
+ /* Load a Byte with sign extension*/
+ emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_H:
+ /* Load a HalfWord with sign extension*/
+ emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_W:
+ /* Load a Word*/
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ break;
+ }
+ /* Carry the sign extension to upper 32 bits */
+ emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
+ arm_bpf_put_reg64(dst, rd, ctx);
+}
+
/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
}
- emit_a32_mov_r64(is64, dst, src, ctx);
+ if (insn->off)
+ emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
+ else
+ emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
/* Sign-extend immediate value to destination reg */
@@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rt = src_lo;
break;
}
- emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+ emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_X:
- goto notyet;
+ rd = arm_bpf_get_reg64(dst, tmp2, ctx);
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ rs = arm_bpf_get_reg64(src, tmp, ctx);
+ break;
+ case BPF_K:
+ rs = tmp;
+ emit_a32_mov_se_i64(is64, rs, imm, ctx);
+ break;
+ }
+ emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
+ arm_bpf_put_reg64(dst, rd, ctx);
+ break;
/* dst = dst << imm */
/* dst = dst >> imm */
/* dst = dst >> imm (signed) */
@@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
/* dst = htole(dst) */
/* dst = htobe(dst) */
- case BPF_ALU | BPF_END | BPF_FROM_LE:
- case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
+ case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
+ /* dst = bswap(dst) */
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
rd = arm_bpf_get_reg64(dst, tmp, ctx);
- if (BPF_SRC(code) == BPF_FROM_LE)
+ if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
goto emit_bswap_uxt;
switch (imm) {
case 16:
@@ -1603,8 +1831,15 @@ exit:
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_DW:
+ /* LDSX: dst = *(signed size *)(src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
- emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ if (BPF_MODE(insn->code) == BPF_MEMSX)
+ emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ else
+ emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
break;
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
@@ -1761,10 +1996,15 @@ go_jmp:
break;
/* JMP OFF */
case BPF_JMP | BPF_JA:
+ case BPF_JMP32 | BPF_JA:
{
- if (off == 0)
+ if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
+ jmp_offset = bpf2a32_offset(i + imm, i, ctx);
+ else if (BPF_CLASS(code) == BPF_JMP && off != 0)
+ jmp_offset = bpf2a32_offset(i + off, i, ctx);
+ else
break;
- jmp_offset = bpf2a32_offset(i+off, i, ctx);
+
check_imm24(jmp_offset);
emit(ARM_B(jmp_offset), ctx);
break;
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index e0b593a1498d..438f0e1f91a0 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -79,9 +79,11 @@
#define ARM_INST_LDST__IMM12 0x00000fff
#define ARM_INST_LDRB_I 0x05500000
#define ARM_INST_LDRB_R 0x07d00000
+#define ARM_INST_LDRSB_I 0x015000d0
#define ARM_INST_LDRD_I 0x014000d0
#define ARM_INST_LDRH_I 0x015000b0
#define ARM_INST_LDRH_R 0x019000b0
+#define ARM_INST_LDRSH_I 0x015000f0
#define ARM_INST_LDR_I 0x05100000
#define ARM_INST_LDR_R 0x07900000
@@ -137,6 +139,7 @@
#define ARM_INST_TST_I 0x03100000
#define ARM_INST_UDIV 0x0730f010
+#define ARM_INST_SDIV 0x0710f010
#define ARM_INST_UMULL 0x00800090
@@ -265,6 +268,7 @@
#define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm)
#define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
+#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)
#define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \
| (rd_lo) << 12 | (rm) << 8 | rn)