From 471f3d4ee4a6db5c8621bb1c186a1d20a0639630 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:42 +0000 Subject: arm32, bpf: add support for 32-bit offset jmp instruction The cpuv4 adds unconditional jump with 32-bit offset where the immediate field of the instruction is to be used to calculate the jump offset. BPF_JA | BPF_K | BPF_JMP32 => gotol +imm => PC += imm. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-2-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6a1c9fca5260..c03600fe86f6 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1761,10 +1761,15 @@ go_jmp: break; /* JMP OFF */ case BPF_JMP | BPF_JA: + case BPF_JMP32 | BPF_JA: { - if (off == 0) + if (BPF_CLASS(code) == BPF_JMP32 && imm != 0) + jmp_offset = bpf2a32_offset(i + imm, i, ctx); + else if (BPF_CLASS(code) == BPF_JMP && off != 0) + jmp_offset = bpf2a32_offset(i + off, i, ctx); + else break; - jmp_offset = bpf2a32_offset(i+off, i, ctx); + check_imm24(jmp_offset); emit(ARM_B(jmp_offset), ctx); break; -- cgit v1.2.3-58-ga151 From f9e6981b1f1ce5e954e4e9b82e6d3e564d4a3254 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:43 +0000 Subject: arm32, bpf: add support for sign-extension load instruction The cpuv4 added the support of an instruction that is similar to load but also sign-extends the result after the load. BPF_MEMSX | | BPF_LDX means dst = *(signed size *) (src + offset) here can be one of BPF_B, BPF_H, BPF_W. ARM32 has instructions to load a byte or a half word with sign extension into a 32bit register. As the JIT uses two 32 bit registers to simulate a 64-bit BPF register, an extra instruction is emitted to sign-extent the result up to the second register. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-3-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++- arch/arm/net/bpf_jit_32.h | 2 ++ 2 files changed, 75 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c03600fe86f6..1b3bd05878ab 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -333,6 +333,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) +#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) +#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) + #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) @@ -1026,6 +1029,24 @@ static bool is_ldst_imm(s16 off, const u8 size) return -off_max <= off && off <= off_max; } +static bool is_ldst_imm8(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + off_max = 0xff; + break; + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], s16 off, struct jit_ctx *ctx, const u8 sz){ @@ -1105,6 +1126,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, arm_bpf_put_reg64(dst, rd, ctx); } +/* dst = *(signed size*)(src + off) */ +static inline void emit_ldsx_r(const s8 dst[], const s8 src, + s16 off, struct jit_ctx *ctx, const u8 sz){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + s8 rm = src; + int add_off; + + if (!is_ldst_imm8(off, sz)) { + /* + * offset does not fit in the load/store immediate, + * construct an ADD instruction to apply the offset. + */ + add_off = imm8m(off); + if (add_off > 0) { + emit(ARM_ADD_I(tmp[0], src, add_off), ctx); + rm = tmp[0]; + } else { + emit_a32_mov_i(tmp[0], off, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); + rm = tmp[0]; + } + off = 0; + } + + switch (sz) { + case BPF_B: + /* Load a Byte with sign extension*/ + emit(ARM_LDRSB_I(rd[1], rm, off), ctx); + break; + case BPF_H: + /* Load a HalfWord with sign extension*/ + emit(ARM_LDRSH_I(rd[1], rm, off), ctx); + break; + case BPF_W: + /* Load a Word*/ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + break; + } + /* Carry the sign extension to upper 32 bits */ + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Arithmatic Operation */ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op, @@ -1603,8 +1668,15 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); + if (BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code)); + else + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* speculation barrier */ case BPF_ST | BPF_NOSPEC: diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e0b593a1498d..79c7373fadce 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -79,9 +79,11 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRSB_I 0x015000d0 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 +#define ARM_INST_LDRSH_I 0x015000f0 #define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 -- cgit v1.2.3-58-ga151 From fc832653fa0dba174bf8fee9db85f3f9d1450b8a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:44 +0000 Subject: arm32, bpf: add support for sign-extension mov instruction The cpuv4 added a new BPF_MOVSX instruction that sign extends the src before moving it to the destination. BPF_ALU | BPF_MOVSX sign extends 8-bit and 16-bit operands into 32-bit operands, and zeroes the remaining upper 32 bits. BPF_ALU64 | BPF_MOVSX sign extends 8-bit, 16-bit, and 32-bit operands into 64-bit operands. The offset field of the instruction is used to tell the number of bit to use for sign-extension. BPF_MOV and BPF_MOVSX have the same code but the former sets offset to 0 and the later one sets the offset to 8, 16 or 32 The behaviour of this instruction is dst = (s8,s16,s32)src On ARM32 the implementation uses LSH and ARSH to extend the 8/16 bits to a 32-bit register and then it is sign extended to the upper 32-bit register using ARSH. For 32-bit we just move it to the destination register and use ARSH to extend it to the upper 32-bit register. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-4-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 1b3bd05878ab..29a1ccf761fd 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -747,12 +747,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); + if (off && off != 32) { + emit(ARM_LSL_I(rt, rt, 32 - off), ctx); + emit(ARM_ASR_I(rt, rt, 32 - off), ctx); + } arm_bpf_put_reg32(dst, rt, ctx); } @@ -761,15 +765,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, ctx); - emit_a32_mov_r(dst_hi, src_hi, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_hi, src_hi, 0, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -785,6 +789,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], } } +/* dst = (signed)src */ +static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rt; + + rt = arm_bpf_get_reg64(dst, tmp, ctx); + + emit_a32_mov_r(dst_lo, src_lo, off, ctx); + if (!is64) { + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else { + emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + } +} + /* Shift operations */ static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { @@ -1450,7 +1472,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_a32_mov_i(dst_hi, 0, ctx); break; } - emit_a32_mov_r64(is64, dst, src, ctx); + if (insn->off) + emit_a32_movsx_r64(is64, insn->off, dst, src, ctx); + else + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ -- cgit v1.2.3-58-ga151 From 1cfb7eaebeac9270fcb527f47bbdea34ca3cd5b2 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:45 +0000 Subject: arm32, bpf: add support for unconditional bswap instruction The cpuv4 added a new unconditional bswap instruction with following behaviour: BPF_ALU64 | BPF_TO_LE | BPF_END with imm = 16/32/64 means: dst = bswap16(dst) dst = bswap32(dst) dst = bswap64(dst) As we already support converting to big-endian from little-endian we can use the same for unconditional bswap. just treat the unconditional scenario the same as big-endian conversion. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-5-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 29a1ccf761fd..a08eba850ddf 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1635,10 +1635,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* dst = htole(dst) */ /* dst = htobe(dst) */ - case BPF_ALU | BPF_END | BPF_FROM_LE: - case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ + case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */ + /* dst = bswap(dst) */ + case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ rd = arm_bpf_get_reg64(dst, tmp, ctx); - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64) goto emit_bswap_uxt; switch (imm) { case 16: -- cgit v1.2.3-58-ga151 From 5097faa559a6097de436bdff4027d036b5493d1a Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:46 +0000 Subject: arm32, bpf: add support for 32-bit signed division The cpuv4 added a new BPF_SDIV instruction that does signed division. The encoding is similar to BPF_DIV but BPF_SDIV sets offset=1. ARM32 already supports 32-bit BPF_DIV which can be easily extended to support BPF_SDIV as ARM32 has the SDIV instruction. When the CPU is not ARM-v7, we implement that SDIV/SMOD with the function call similar to the implementation of DIV/MOD. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-6-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 38 ++++++++++++++++++++++++++++++-------- arch/arm/net/bpf_jit_32.h | 2 ++ 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a08eba850ddf..6939546f4ddf 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -228,6 +228,16 @@ static u32 jit_mod32(u32 dividend, u32 divisor) return dividend % divisor; } +static s32 jit_sdiv32(s32 dividend, s32 divisor) +{ + return dividend / divisor; +} + +static s32 jit_smod32(s32 dividend, s32 divisor) +{ + return dividend % divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -477,17 +487,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from - 2; } -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign) { const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; + u32 dst; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { - if (op == BPF_DIV) - emit(ARM_UDIV(rd, rm, rn), ctx); - else { - emit(ARM_UDIV(ARM_IP, rm, rn), ctx); + if (op == BPF_DIV) { + emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx); + } else { + emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx); emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); } return; @@ -515,8 +526,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); /* Call appropriate function */ - emit_mov_i(ARM_IP, op == BPF_DIV ? - (u32)jit_udiv32 : (u32)jit_mod32, ctx); + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv32; + else + dst = (u32)jit_smod32; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv32; + else + dst = (u32)jit_mod32; + } + + emit_mov_i(ARM_IP, dst, ctx); emit_blx_r(ARM_IP, ctx); /* Restore caller-saved registers from stack */ @@ -1551,7 +1573,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); if (!ctx->prog->aux->verifier_zext) emit_a32_mov_i(dst_hi, 0, ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index 79c7373fadce..438f0e1f91a0 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -139,6 +139,7 @@ #define ARM_INST_TST_I 0x03100000 #define ARM_INST_UDIV 0x0730f010 +#define ARM_INST_SDIV 0x0710f010 #define ARM_INST_UMULL 0x00800090 @@ -267,6 +268,7 @@ #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) #define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8) +#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8) #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ | (rd_lo) << 12 | (rm) << 8 | rn) -- cgit v1.2.3-58-ga151 From 71086041c2ba04aa436a4b2283c708345e72a0bb Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:47 +0000 Subject: arm32, bpf: add support for 64 bit division instruction ARM32 doesn't have instructions to do 64-bit/64-bit divisions. So, to implement the following instructions: BPF_ALU64 | BPF_DIV BPF_ALU64 | BPF_MOD BPF_ALU64 | BPF_SDIV BPF_ALU64 | BPF_SMOD We implement the above instructions by doing function calls to div64_u64() and div64_u64_rem() for unsigned division/mod and calls to div64_s64() for signed division/mod. Signed-off-by: Puranjay Mohan Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230907230550.1417590-7-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 116 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6939546f4ddf..1d672457d02f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -2,6 +2,7 @@ /* * Just-In-Time compiler for eBPF filters on 32bit ARM * + * Copyright (c) 2023 Puranjay Mohan * Copyright (c) 2017 Shubham Bansal * Copyright (c) 2011 Mircea Gherzan */ @@ -15,6 +16,7 @@ #include #include #include +#include #include #include @@ -238,6 +240,34 @@ static s32 jit_smod32(s32 dividend, s32 divisor) return dividend % divisor; } +/* Wrappers for 64-bit div/mod */ +static u64 jit_udiv64(u64 dividend, u64 divisor) +{ + return div64_u64(dividend, divisor); +} + +static u64 jit_mod64(u64 dividend, u64 divisor) +{ + u64 rem; + + div64_u64_rem(dividend, divisor, &rem); + return rem; +} + +static s64 jit_sdiv64(s64 dividend, s64 divisor) +{ + return div64_s64(dividend, divisor); +} + +static s64 jit_smod64(s64 dividend, s64 divisor) +{ + u64 q; + + q = div64_s64(dividend, divisor); + + return dividend - q * divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -555,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } +static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, + u8 op, u8 sign) +{ + u32 dst; + + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK), ctx); + + /* + * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in + * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, + * we can recover them later after returning from the function call. + */ + if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { + /* + * Move Rm to {R1, R0} if it is not already there. + */ + if (rm[1] != ARM_R0) { + if (rn[1] == ARM_R0) + emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); + emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); + if (rn[1] == ARM_R0) { + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + goto cont; + } + } + /* + * Move Rn to {R3, R2} if it is not already there. + */ + if (rn[1] != ARM_R2) { + emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); + emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); + } + } + +cont: + + /* Call appropriate function */ + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv64; + else + dst = (u32)jit_smod64; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv64; + else + dst = (u32)jit_mod64; + } + + emit_mov_i(ARM_IP, dst, ctx); + emit_blx_r(ARM_IP, ctx); + + /* Save return value */ + if (rd[1] != ARM_R0) { + emit(ARM_MOV_R(rd[0], ARM_R1), ctx); + emit(ARM_MOV_R(rd[1], ARM_R0), ctx); + } + + /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ + if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { + emit(ARM_POP(CALLER_MASK), ctx); + } else if (rd[1] != ARM_R0) { + emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + } else { + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + } +} + /* Is the translated BPF register on stack? */ static bool is_stacked(s8 reg) { @@ -1582,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: - goto notyet; + rd = arm_bpf_get_reg64(dst, tmp2, ctx); + switch (BPF_SRC(code)) { + case BPF_X: + rs = arm_bpf_get_reg64(src, tmp, ctx); + break; + case BPF_K: + rs = tmp; + emit_a32_mov_se_i64(is64, rs, imm, ctx); + break; + } + emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); + arm_bpf_put_reg64(dst, rd, ctx); + break; /* dst = dst << imm */ /* dst = dst >> imm */ /* dst = dst >> imm (signed) */ -- cgit v1.2.3-58-ga151