diff options
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 248 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.c | 38 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.h | 44 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/offload.c | 65 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_app.h | 60 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_asm.c | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_asm.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 111 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_repr.h | 1 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 3 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 20 | ||||
-rw-r--r-- | net/core/dev.c | 9 | ||||
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rwxr-xr-x | samples/bpf/xdp2skb_meta.sh | 220 | ||||
-rw-r--r-- | samples/bpf/xdp2skb_meta_kern.c | 103 |
18 files changed, 760 insertions, 263 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 0de59f04da84..47c5224f8d6f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -85,7 +85,7 @@ static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) { - return nfp_prog->start_off + nfp_prog->prog_len; + return nfp_prog->prog_len; } static bool @@ -100,12 +100,6 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); } -static unsigned int -nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) -{ - return offset - nfp_prog->start_off; -} - /* --- Emitters --- */ static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, @@ -195,22 +189,28 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } -static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +static void +emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, + enum nfp_relo_type relo) { - if (defer > 2) { + if (mask == BR_UNC && defer > 2) { pr_err("BUG: branch defer out of bounds %d\n", defer); nfp_prog->error = -EFAULT; return; } - __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); + + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); } static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { - __emit_br(nfp_prog, mask, - mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, - BR_CSS_NONE, addr, defer); + emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); } static void @@ -515,16 +515,6 @@ static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) emit_nop(nfp_prog); } -static void -wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, - enum br_special special) -{ - emit_br(nfp_prog, mask, 0, 0); - - nfp_prog->prog[nfp_prog->prog_len - 1] |= - FIELD_PREP(OP_BR_SPECIAL, special); -} - static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) { emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); @@ -749,7 +739,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); @@ -762,7 +752,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) /* Check packet length */ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); @@ -1269,7 +1259,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); /* Skip over the -EINVAL ret code (defer 2) */ - emit_br_def(nfp_prog, end, 2); + emit_br(nfp_prog, BR_UNC, end, 2); emit_alu(nfp_prog, plen_reg(nfp_prog), plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); @@ -1924,6 +1914,26 @@ static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); } +static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true); +} + static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2013,6 +2023,26 @@ static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); } +static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true); +} + static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); @@ -2036,7 +2066,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); return 0; } @@ -2097,6 +2127,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm, [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, @@ -2104,24 +2138,16 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, [BPF_JMP | BPF_EXIT] = goto_out, }; -/* --- Misc code --- */ -static void br_set_offset(u64 *instr, u16 offset) -{ - u16 addr_lo, addr_hi; - - addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); - addr_hi = offset != addr_lo; - *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); - *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); - *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); -} - /* --- Assembler logic --- */ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { @@ -2137,11 +2163,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) continue; if (list_is_last(&meta->l, &nfp_prog->insns)) - idx = nfp_prog->last_bpf_off; + br_idx = nfp_prog->last_bpf_off; else - idx = list_next_entry(meta, l)->off - 1; - - br_idx = nfp_prog_offset_to_index(nfp_prog, idx); + br_idx = list_next_entry(meta, l)->off - 1; if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", @@ -2149,7 +2173,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } /* Leave special branches for later */ - if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) continue; if (!meta->jmp_dst) { @@ -2164,38 +2189,13 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } - for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); - idx <= br_idx; idx++) { + for (idx = meta->off; idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) continue; br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); } } - /* Fixup 'goto out's separately, they can be scattered around */ - for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { - enum br_special special; - - if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) - continue; - - special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); - switch (special) { - case OP_BR_NORMAL: - break; - case OP_BR_GO_OUT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_out); - break; - case OP_BR_GO_ABORT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_abort); - break; - } - - nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; - } - return 0; } @@ -2223,7 +2223,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); @@ -2250,7 +2250,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); emit_shf(nfp_prog, reg_b(2), reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); @@ -2269,7 +2269,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); @@ -2290,7 +2290,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); @@ -2706,25 +2706,38 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) return 0; } -static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) +static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) { + __le64 *ustore = (__force __le64 *)prog; int i; - for (i = 0; i < nfp_prog->prog_len; i++) { + for (i = 0; i < len; i++) { int err; - err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); + err = nfp_ustore_check_valid_no_ecc(prog[i]); if (err) return err; - nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); - - ustore[i] = cpu_to_le64(nfp_prog->prog[i]); + ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); } return 0; } +static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) +{ + void *prog; + + prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); + if (!prog) + return; + + nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); + memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); + kvfree(nfp_prog->prog); + nfp_prog->prog = prog; +} + int nfp_bpf_jit(struct nfp_prog *nfp_prog) { int ret; @@ -2740,5 +2753,78 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return -EINVAL; } - return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); + nfp_bpf_prog_trim(nfp_prog); + + return ret; +} + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +{ + struct nfp_insn_meta *meta; + + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + u64 code = meta->insn.code; + + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && + BPF_OP(code) != BPF_CALL) { + struct nfp_insn_meta *dst_meta; + unsigned short dst_indx; + + dst_indx = meta->n + 1 + meta->insn.off; + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, + cnt); + + meta->jmp_dst = dst_meta; + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + } + } +} + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) +{ + unsigned int i; + u64 *prog; + int err; + + prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), + GFP_KERNEL); + if (!prog) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nfp_prog->prog_len; i++) { + enum nfp_relo_type special; + + special = FIELD_GET(OP_RELO_TYPE, prog[i]); + switch (special) { + case RELO_NONE: + continue; + case RELO_BR_REL: + br_add_offset(&prog[i], bv->start_off); + break; + case RELO_BR_GO_OUT: + br_set_offset(&prog[i], + nfp_prog->tgt_out + bv->start_off); + break; + case RELO_BR_GO_ABORT: + br_set_offset(&prog[i], + nfp_prog->tgt_abort + bv->start_off); + break; + case RELO_BR_NEXT_PKT: + br_set_offset(&prog[i], bv->tgt_done); + break; + } + + prog[i] &= ~OP_RELO_TYPE; + } + + err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); + if (err) + goto err_free_prog; + + return prog; + +err_free_prog: + kfree(prog); + return ERR_PTR(err); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 4b63167906ca..e8cfe300c8c4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -87,16 +87,21 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) static int nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { + struct nfp_bpf_vnic *bv; int err; - nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL); - if (!nn->app_priv) + bv = kzalloc(sizeof(*bv), GFP_KERNEL); + if (!bv) return -ENOMEM; + nn->app_priv = bv; err = nfp_app_nic_vnic_alloc(app, nn, id); if (err) goto err_free_priv; + bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + return 0; err_free_priv: kfree(nn->app_priv); @@ -191,7 +196,27 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) { - return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + struct nfp_bpf_vnic *bv = nn->app_priv; + + return !!bv->tc_prog; +} + +static int +nfp_bpf_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int max_mtu; + + if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return 0; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (new_mtu > max_mtu) { + nn_info(nn, "BPF offload active, MTU over %u not supported\n", + max_mtu); + return -EBUSY; + } + return 0; } static int @@ -311,6 +336,8 @@ const struct nfp_app_type app_bpf = { .init = nfp_bpf_init, .clean = nfp_bpf_clean, + .change_mtu = nfp_bpf_change_mtu, + .extra_cap = nfp_bpf_extra_cap, .vnic_alloc = nfp_bpf_vnic_alloc, @@ -318,9 +345,6 @@ const struct nfp_app_type app_bpf = { .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, + .bpf = nfp_ndo_bpf, .xdp_offload = nfp_bpf_xdp_offload, - - .bpf_verifier_prep = nfp_bpf_verifier_prep, - .bpf_translate = nfp_bpf_translate, - .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 89a9b6393882..66381afee2a9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -42,17 +42,28 @@ #include "../nfp_asm.h" -/* For branch fixup logic use up-most byte of branch instruction as scratch +/* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ -#define OP_BR_SPECIAL 0xff00000000000000ULL - -enum br_special { - OP_BR_NORMAL = 0, - OP_BR_GO_OUT, - OP_BR_GO_ABORT, +#define OP_RELO_TYPE 0xff00000000000000ULL + +enum nfp_relo_type { + RELO_NONE = 0, + /* standard internal jumps */ + RELO_BR_REL, + /* internal jumps to parts of the outro */ + RELO_BR_GO_OUT, + RELO_BR_GO_ABORT, + /* external jumps to fixed addresses */ + RELO_BR_NEXT_PKT, }; +/* To make absolute relocated branches (branches other than RELO_BR_REL) + * distinguishable in user space dumps from normal jumps, add a large offset + * to them. + */ +#define BR_OFF_RELO 15000 + enum static_regs { STATIC_REG_IMM = 21, /* Bank AB */ STATIC_REG_STACK = 22, /* Bank A */ @@ -191,11 +202,9 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) * @__prog_alloc_len: alloc size of @prog array * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type - * @start_off: address of the first instruction in the memory * @last_bpf_off: address of the last instruction translated from BPF * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) - * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong * @stack_depth: max stack depth from the verifier @@ -213,11 +222,9 @@ struct nfp_prog { enum bpf_prog_type type; - unsigned int start_off; unsigned int last_bpf_off; unsigned int tgt_out; unsigned int tgt_abort; - unsigned int tgt_done; unsigned int n_translated; int error; @@ -231,11 +238,16 @@ struct nfp_prog { /** * struct nfp_bpf_vnic - per-vNIC BPF priv structure * @tc_prog: currently loaded cls_bpf program + * @start_off: address of the first instruction in the memory + * @tgt_done: jump target to get the next packet */ struct nfp_bpf_vnic { struct bpf_prog *tc_prog; + unsigned int start_off; + unsigned int tgt_done; }; +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); int nfp_bpf_jit(struct nfp_prog *prog); extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; @@ -244,16 +256,14 @@ struct netdev_bpf; struct nfp_app; struct nfp_net; +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog); -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns); + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index fa2905e67b07..320b2250d29a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -42,6 +42,7 @@ #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/list.h> +#include <linux/mm.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> @@ -70,23 +71,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, list_add_tail(&meta->l, &nfp_prog->insns); } - /* Another pass to record jump information. */ - list_for_each_entry(meta, &nfp_prog->insns, l) { - u64 code = meta->insn.code; - - if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && - BPF_OP(code) != BPF_CALL) { - struct nfp_insn_meta *dst_meta; - unsigned short dst_indx; - - dst_indx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, - cnt); - - meta->jmp_dst = dst_meta; - dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; - } - } + nfp_bpf_jit_prepare(nfp_prog, cnt); return 0; } @@ -102,8 +87,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) +static int +nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->verifier.prog; struct nfp_prog *nfp_prog; @@ -133,8 +119,7 @@ err_free: return ret; } -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int stack_size; @@ -146,37 +131,48 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } - - nfp_prog->stack_depth = prog->aux->stack_depth; - nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); - nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); if (!nfp_prog->prog) return -ENOMEM; return nfp_bpf_jit(nfp_prog); } -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - kfree(nfp_prog->prog); + kvfree(nfp_prog->prog); nfp_prog_free(nfp_prog); return 0; } +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_bpf_verifier_prep(app, nn, bpf); + case BPF_OFFLOAD_TRANSLATE: + return nfp_bpf_translate(nn, bpf->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_bpf_destroy(nn, bpf->offload.prog); + default: + return -EINVAL; + } +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int max_mtu; dma_addr_t dma_addr; + void *img; int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; @@ -185,11 +181,17 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) return -EOPNOTSUPP; } - dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); + if (IS_ERR(img)) + return PTR_ERR(img); + + dma_addr = dma_map_single(nn->dp.dev, img, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); - if (dma_mapping_error(nn->dp.dev, dma_addr)) + if (dma_mapping_error(nn->dp.dev, dma_addr)) { + kfree(img); return -ENOMEM; + } nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -201,6 +203,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); + kfree(img); return err; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index d8870c2f11f3..7890d95d4018 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#define pr_fmt(fmt) "NFP net bpf: " fmt - #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/kernel.h> @@ -41,6 +39,9 @@ #include "fw.h" #include "main.h" +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__) + struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -116,18 +117,18 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, switch (func_id) { case BPF_FUNC_xdp_adjust_head: if (!bpf->adjust_head.off_max) { - pr_warn("adjust_head not supported by FW\n"); + pr_vlog(env, "adjust_head not supported by FW\n"); return -EOPNOTSUPP; } if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) { - pr_warn("adjust_head: FW requires shifting metadata, not supported by the driver\n"); + pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n"); return -EOPNOTSUPP; } nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; default: - pr_warn("unsupported function id: %d\n", func_id); + pr_vlog(env, "unsupported function id: %d\n", func_id); return -EOPNOTSUPP; } @@ -150,7 +151,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); - pr_info("unsupported exit state: %d, var_off: %s\n", + pr_vlog(env, "unsupported exit state: %d, var_off: %s\n", reg0->type, tn_buf); return -EINVAL; } @@ -160,7 +161,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { - pr_info("unsupported exit state: %d, imm: %llx\n", + pr_vlog(env, "unsupported exit state: %d, imm: %llx\n", reg0->type, imm); return -EINVAL; } @@ -171,12 +172,13 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_reg_state *reg) + const struct bpf_reg_state *reg, + struct bpf_verifier_env *env) { s32 old_off, new_off; if (!tnum_is_const(reg->var_off)) { - pr_info("variable ptr stack access\n"); + pr_vlog(env, "variable ptr stack access\n"); return -EINVAL; } @@ -194,7 +196,7 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, if (old_off % 4 == new_off % 4) return 0; - pr_info("stack access changed location was:%d is:%d\n", + pr_vlog(env, "stack access changed location was:%d is:%d\n", old_off, new_off); return -EINVAL; } @@ -209,18 +211,18 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (reg->type != PTR_TO_CTX && reg->type != PTR_TO_STACK && reg->type != PTR_TO_PACKET) { - pr_info("unsupported ptr type: %d\n", reg->type); + pr_vlog(env, "unsupported ptr type: %d\n", reg->type); return -EINVAL; } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env); if (err) return err; } if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { - pr_info("ptr type changed for instruction %d -> %d\n", + pr_vlog(env, "ptr type changed for instruction %d -> %d\n", meta->ptr.type, reg->type); return -EINVAL; } @@ -241,7 +243,7 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { - pr_err("program uses extended registers - jit hardening?\n"); + pr_vlog(env, "program uses extended registers - jit hardening?\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3af1943a8521..32ff46a00f70 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -82,15 +82,15 @@ extern const struct nfp_app_type app_flower; * @repr_clean: representor about to be unregistered * @repr_open: representor netdev open callback * @repr_stop: representor netdev stop callback + * @change_mtu: MTU change on a netdev has been requested (veto-only, change + * is not guaranteed to be committed) * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) + * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program - * @bpf_verifier_prep: verifier prep for dev-specific BPF programs - * @bpf_translate: translate call for dev-specific BPF programs - * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -120,6 +120,9 @@ struct nfp_app_type { int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr); int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr); + int (*change_mtu)(struct nfp_app *app, struct net_device *netdev, + int new_mtu); + int (*start)(struct nfp_app *app); void (*stop)(struct nfp_app *app); @@ -128,14 +131,10 @@ struct nfp_app_type { int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); + int (*bpf)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *xdp); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); - int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); - int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); - int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -242,6 +241,14 @@ nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev) app->type->repr_clean(app, netdev); } +static inline int +nfp_app_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + if (!app || !app->type->change_mtu) + return 0; + return app->type->change_mtu(app, netdev, new_mtu); +} + static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl) { app->ctrl = ctrl; @@ -303,6 +310,14 @@ static inline int nfp_app_setup_tc(struct nfp_app *app, return app->type->setup_tc(app, netdev, type, type_data); } +static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf) + return -EINVAL; + return app->type->bpf(app, nn, bpf); +} + static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { @@ -311,33 +326,6 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } -static inline int -nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) -{ - if (!app || !app->type->bpf_verifier_prep) - return -EOPNOTSUPP; - return app->type->bpf_verifier_prep(app, nn, bpf); -} - -static inline int -nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_translate) - return -EOPNOTSUPP; - return app->type->bpf_translate(app, nn, prog); -} - -static inline int -nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_destroy) - return -EOPNOTSUPP; - return app->type->bpf_destroy(app, nn, prog); -} - static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index d3610987fb07..9ee3a3f60cc7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -50,6 +50,36 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, }; +u16 br_get_offset(u64 instr) +{ + u16 addr_lo, addr_hi; + + addr_lo = FIELD_GET(OP_BR_ADDR_LO, instr); + addr_hi = FIELD_GET(OP_BR_ADDR_HI, instr); + + return (addr_hi * ((OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)) + 1)) | + addr_lo; +} + +void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +void br_add_offset(u64 *instr, u16 offset) +{ + u16 addr; + + addr = br_get_offset(*instr); + br_set_offset(instr, addr + offset); +} + static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) { bool lm_id, lm_dec = false; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index a24daeab1a77..20e51cb60e69 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -81,6 +81,7 @@ enum br_mask { BR_BHS = 0x04, BR_BLO = 0x05, BR_BGE = 0x08, + BR_BLT = 0x09, BR_UNC = 0x18, }; @@ -93,6 +94,10 @@ enum br_ctx_signal_state { BR_CSS_NONE = 2, }; +u16 br_get_offset(u64 instr); +void br_set_offset(u64 *instr, u16 offset); +void br_add_offset(u64 *instr, u16 offset); + #define OP_BBYTE_BASE 0x0c800000000ULL #define OP_BB_A_SRC 0x000000000ffULL #define OP_BB_BYTE 0x00000000300ULL diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 05e071b3dc5b..caee147fce04 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2253,7 +2253,8 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); kfree(rx_ring->rxbufs); if (rx_ring->rxds) @@ -2279,9 +2280,12 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { int sz, err; - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx); - if (err < 0) - return err; + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx); + if (err < 0) + return err; + } rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; @@ -3045,6 +3049,11 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_dp *dp; + int err; + + err = nfp_app_change_mtu(nn->app, netdev, new_mtu); + if (err) + return err; dp = nfp_net_clone_dp(nn); if (!dp) @@ -3405,16 +3414,8 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); - case BPF_OFFLOAD_TRANSLATE: - return nfp_app_bpf_translate(nn->app, nn, - xdp->offload.prog); - case BPF_OFFLOAD_DESTROY: - return nfp_app_bpf_destroy(nn->app, nn, - xdp->offload.prog); default: - return -EINVAL; + return nfp_app_bpf(nn->app, nn, xdp); } } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 782d452e0fc2..25c36001bffa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -91,23 +91,24 @@ #define NFP_NET_RSS_IPV6_EX_UDP 9 /** - * @NFP_NET_TXR_MAX: Maximum number of TX rings - * @NFP_NET_RXR_MAX: Maximum number of RX rings + * Ring counts + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 /** * Read/Write config words (0x0000 - 0x002c) - * @NFP_NET_CFG_CTRL: Global control - * @NFP_NET_CFG_UPDATE: Indicate which fields are updated - * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings - * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings - * @NFP_NET_CFG_MTU: Set MTU size - * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) - * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions - * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes - * @NFP_NET_CFG_MACADDR: MAC address + * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_UPDATE: Indicate which fields are updated + * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_MACADDR: MAC address * * TODO: * - define Error details in UPDATE @@ -176,14 +177,14 @@ /** * Read-only words (0x0030 - 0x0050): - * @NFP_NET_CFG_VERSION: Firmware version number - * @NFP_NET_CFG_STS: Status - * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) - * @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings - * @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings - * @NFP_NET_CFG_MAX_MTU: Maximum support MTU - * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) - * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * %NFP_NET_CFG_VERSION: Firmware version number + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings + * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings + * %NFP_NET_CFG_MAX_MTU: Maximum support MTU + * %NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * %NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) * * TODO: * - define more STS bits @@ -228,31 +229,31 @@ /** * RSS capabilities - * @NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as - * @NFP_NET_CFG_RSS_HFUNC) + * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as + * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 /** * VXLAN/UDP encap configuration - * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports - * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** * BPF section - * @NFP_NET_CFG_BPF_ABI: BPF ABI version - * @NFP_NET_CFG_BPF_CAP: BPF capabilities - * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes - * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded - * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit - * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks - * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks - * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions - * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code + * %NFP_NET_CFG_BPF_ABI: BPF ABI version + * %NFP_NET_CFG_BPF_CAP: BPF capabilities + * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * %NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * %NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * %NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * %NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * %NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ #define NFP_NET_CFG_BPF_ABI 0x0080 #define NFP_NET_BPF_ABI 2 @@ -278,9 +279,9 @@ /** * RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled - * @NFP_NET_CFG_RSS_CFG: RSS configuration word - * @NFP_NET_CFG_RSS_KEY: RSS "secret" key - * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + * %NFP_NET_CFG_RSS_CFG: RSS configuration word + * %NFP_NET_CFG_RSS_KEY: RSS "secret" key + * %NFP_NET_CFG_RSS_ITBL: RSS indirection table */ #define NFP_NET_CFG_RSS_BASE 0x0100 #define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE @@ -305,13 +306,13 @@ /** * TX ring configuration (0x200 - 0x800) - * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration - * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) - * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) - * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) - * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) - * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * %NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * %NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet */ #define NFP_NET_CFG_TXR_BASE 0x0200 #define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) @@ -325,12 +326,12 @@ /** * RX ring configuration (0x0800 - 0x0c00) - * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration - * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) - * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) - * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) - * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * %NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) */ #define NFP_NET_CFG_RXR_BASE 0x0800 #define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) @@ -343,7 +344,7 @@ /** * Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not - * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the * corresponding entry is enabled. If the FW generates an interrupt, * it writes a cause into the corresponding field. This also masks @@ -393,8 +394,8 @@ /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry - * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) - * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) */ #define NFP_NET_CFG_TXR_STATS_BASE 0x1000 #define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ @@ -418,10 +419,10 @@ /** * VLAN filtering using general use mailbox - * @NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox - * @NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter - * @NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter - * @NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes + * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox + * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter + * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter + * %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes */ #define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL #define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index f50aa119570a..317f87cc3cc6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -186,6 +186,13 @@ nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } +static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + return nfp_app_change_mtu(repr->app, netdev, new_mtu); +} + static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); @@ -240,6 +247,7 @@ const struct net_device_ops nfp_repr_netdev_ops = { .ndo_open = nfp_repr_open, .ndo_stop = nfp_repr_stop, .ndo_start_xmit = nfp_repr_xmit, + .ndo_change_mtu = nfp_repr_change_mtu, .ndo_get_stats64 = nfp_repr_get_stats64, .ndo_has_offload_stats = nfp_repr_has_offload_stats, .ndo_get_offload_stats = nfp_repr_get_offload_stats, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 5d4d897bc9c6..cbc7badf40a0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -89,6 +89,7 @@ struct nfp_repr { * @NFP_REPR_TYPE_PHYS_PORT: external NIC port * @NFP_REPR_TYPE_PF: physical function * @NFP_REPR_TYPE_VF: virtual function + * @__NFP_REPR_TYPE_MAX: number of representor types */ enum nfp_repr_type { NFP_REPR_TYPE_PHYS_PORT, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2feb218c001d..6b66cd1aa0b9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -192,6 +192,9 @@ struct bpf_verifier_env { u32 subprog_cnt; }; +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...); + static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2b211262c25..3b2b47666180 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -169,11 +169,11 @@ struct bpf_call_arg_meta { static DEFINE_MUTEX(bpf_verifier_lock); /* log_level controls verbosity level of eBPF verifier. - * verbose() is used to dump the verification trace to the log, so the user - * can figure out what's wrong with the program + * bpf_verifier_log_write() is used to dump the verification trace to the log, + * so the user can figure out what's wrong with the program */ -static __printf(2, 3) void verbose(struct bpf_verifier_env *env, - const char *fmt, ...) +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...) { struct bpf_verifer_log *log = &env->log; unsigned int n; @@ -197,6 +197,14 @@ static __printf(2, 3) void verbose(struct bpf_verifier_env *env, else log->ubuf = NULL; } +EXPORT_SYMBOL_GPL(bpf_verifier_log_write); +/* Historically bpf_verifier_log_write was called verbose, but the name was too + * generic for symbol export. The function was renamed, but not the calls in + * the verifier to avoid complicating backports. Hence the alias below. + */ +static __printf(2, 3) void verbose(struct bpf_verifier_env *env, + const char *fmt, ...) + __attribute__((alias("bpf_verifier_log_write"))); static bool type_is_pkt_pointer(enum bpf_reg_type type) { @@ -375,6 +383,8 @@ static int realloc_func_state(struct bpf_func_state *state, int size, static void free_func_state(struct bpf_func_state *state) { + if (!state) + return; kfree(state->stack); kfree(state); } @@ -487,6 +497,8 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, } return &elem->st; err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; /* pop all elements and return */ while (!pop_stack(env, NULL, NULL)); return NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 5cb782f074d7..3d24d9a59086 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7657,7 +7657,7 @@ err_rxq_info: /* Rollback successful reg's and free other resources */ while (i--) xdp_rxq_info_unreg(&rx[i].xdp_rxq); - kfree(dev->_rx); + kvfree(dev->_rx); dev->_rx = NULL; return err; } @@ -7665,16 +7665,15 @@ err_rxq_info: static void netif_free_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; - struct netdev_rx_queue *rx; /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ if (!dev->_rx) return; - rx = dev->_rx; - for (i = 0; i < count; i++) - xdp_rxq_info_unreg(&rx[i].xdp_rxq); + xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq); + + kvfree(dev->_rx); } static void netdev_init_one_queue(struct net_device *dev, diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3ff7a05bea9a..7f61a3d57fa7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += xdp_rxq_info_kern.o +always += xdp2skb_meta_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh new file mode 100755 index 000000000000..b9c9549c4c27 --- /dev/null +++ b/samples/bpf/xdp2skb_meta.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. +# +# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load +# eBPF programs, both for XDP and clsbpf. Shell script function +# wrappers and even long options parsing is illustrated, for ease of +# use. +# +# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs +# that need to collaborate between XDP and TC hooks. Thus, it is +# convenient that the same tool load both programs that need to work +# together. +# +BPF_FILE=xdp2skb_meta_kern.o +DIR=$(dirname $0) + +export TC=/usr/sbin/tc +export IP=/usr/sbin/ip + +function usage() { + echo "" + echo "Usage: $0 [-vfh] --dev ethX" + echo " -d | --dev : Network device (required)" + echo " --flush : Cleanup flush TC and XDP progs" + echo " --list : (\$LIST) List TC and XDP progs" + echo " -v | --verbose : (\$VERBOSE) Verbose" + echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)" + echo "" +} + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo "ERROR: $@" >&2 + exit $exitcode +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "# $@" + fi +} + +## -- Helper function calls -- + +# Wrapper call for TC and IP +# - Will display the offending command on failure +function _call_cmd() { + local cmd="$1" + local allow_fail="$2" + shift 2 + if [[ -n "$VERBOSE" ]]; then + echo "$(basename $cmd) $@" + fi + if [[ -n "$DRYRUN" ]]; then + return + fi + $cmd "$@" + local status=$? + if (( $status != 0 )); then + if [[ "$allow_fail" == "" ]]; then + err 2 "Exec error($status) occurred cmd: \"$cmd $@\"" + fi + fi +} +function call_tc() { + _call_cmd "$TC" "" "$@" +} +function call_tc_allow_fail() { + _call_cmd "$TC" "allow_fail" "$@" +} +function call_ip() { + _call_cmd "$IP" "" "$@" +} + +## --- Parse command line arguments / parameters --- +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o vfhd: \ + --long verbose,flush,help,list,dev:,dry-run -- "$@") +if (( $? != 0 )); then + err 4 "Error calling getopt" +fi +eval set -- "$OPTIONS" + +unset DEV +unset FLUSH +while true; do + case "$1" in + -d | --dev ) # device + DEV=$2 + info "Device set to: DEV=$DEV" >&2 + shift 2 + ;; + -v | --verbose) + VERBOSE=yes + # info "Verbose mode: VERBOSE=$VERBOSE" >&2 + shift + ;; + --dry-run ) + DRYRUN=yes + VERBOSE=yes + info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2 + shift + ;; + -f | --flush ) + FLUSH=yes + shift + ;; + --list ) + LIST=yes + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +FILE="$DIR/$BPF_FILE" +if [[ ! -e $FILE ]]; then + err 3 "Missing BPF object file ($FILE)" +fi + +if [[ -z $DEV ]]; then + usage + err 2 "Please specify network device -- required option --dev" +fi + +## -- Function calls -- + +function list_tc() +{ + local device="$1" + shift + info "Listing current TC ingress rules" + call_tc filter show dev $device ingress +} + +function list_xdp() +{ + local device="$1" + shift + info "Listing current XDP device($device) setting" + call_ip link show dev $device | grep --color=auto xdp +} + +function flush_tc() +{ + local device="$1" + shift + info "Flush TC on device: $device" + call_tc_allow_fail filter del dev $device ingress + call_tc_allow_fail qdisc del dev $device clsact +} + +function flush_xdp() +{ + local device="$1" + shift + info "Flush XDP on device: $device" + call_ip link set dev $device xdp off +} + +function attach_tc_mark() +{ + local device="$1" + local file="$2" + local prog="tc_mark" + shift 2 + + # Re-attach clsact to clear/flush existing role + call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null + call_tc qdisc add dev $device clsact + + # Attach BPF prog + call_tc filter add dev $device ingress \ + prio 1 handle 1 bpf da obj $file sec $prog +} + +function attach_xdp_mark() +{ + local device="$1" + local file="$2" + local prog="xdp_mark" + shift 2 + + # Remove XDP prog in-case it's already loaded + # TODO: Need ip-link option to override/replace existing XDP prog + flush_xdp $device + + # Attach XDP/BPF prog + call_ip link set dev $device xdp obj $file sec $prog +} + +if [[ -n $FLUSH ]]; then + flush_tc $DEV + flush_xdp $DEV + exit 0 +fi + +if [[ -n $LIST ]]; then + list_tc $DEV + list_xdp $DEV + exit 0 +fi + +attach_tc_mark $DEV $FILE +attach_xdp_mark $DEV $FILE diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c new file mode 100644 index 000000000000..12e1024069c2 --- /dev/null +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + * + * Example howto transfer info from XDP to SKB, e.g. skb->mark + * ----------------------------------------------------------- + * This uses the XDP data_meta infrastructure, and is a cooperation + * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. + * + * Notice: This example does not use the BPF C-loader (bpf_load.c), + * but instead rely on the iproute2 TC tool for loading BPF-objects. + */ +#include <uapi/linux/bpf.h> +#include <uapi/linux/pkt_cls.h> + +#include "bpf_helpers.h" + +/* + * This struct is stored in the XDP 'data_meta' area, which is located + * just in-front-of the raw packet payload data. The meaning is + * specific to these two BPF programs that use it as a communication + * channel. XDP adjust/increase the area via a bpf-helper, and TC use + * boundary checks to see if data have been provided. + * + * The struct must be 4 byte aligned, which here is enforced by the + * struct __attribute__((aligned(4))). + */ +struct meta_info { + __u32 mark; +} __attribute__((aligned(4))); + +SEC("xdp_mark") +int _xdp_mark(struct xdp_md *ctx) +{ + struct meta_info *meta; + void *data, *data_end; + int ret; + + /* Reserve space in-front data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (ret < 0) + return XDP_ABORTED; + + /* For some unknown reason, these ctx pointers must be read + * after bpf_xdp_adjust_meta, else verifier will reject prog. + */ + data = (void *)(unsigned long)ctx->data; + + /* Check data_meta have room for meta_info struct */ + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) + return XDP_ABORTED; + + meta->mark = 42; + + return XDP_PASS; +} + +SEC("tc_mark") +int _tc_mark(struct __sk_buff *ctx) +{ + void *data = (void *)(unsigned long)ctx->data; + void *data_end = (void *)(unsigned long)ctx->data_end; + void *data_meta = (void *)(unsigned long)ctx->data_meta; + struct meta_info *meta = data_meta; + + /* Check XDP gave us some data_meta */ + if (meta + 1 > data) { + ctx->mark = 41; + /* Skip "accept" if no data_meta is avail */ + return TC_ACT_OK; + } + + /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */ + ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */ + + return TC_ACT_OK; +} + +/* Manually attaching these programs: +export DEV=ixgbe2 +export FILE=xdp2skb_meta_kern.o + +# via TC command +tc qdisc del dev $DEV clsact 2> /dev/null +tc qdisc add dev $DEV clsact +tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark +tc filter show dev $DEV ingress + +# XDP via IP command: +ip link set dev $DEV xdp off +ip link set dev $DEV xdp obj $FILE sec xdp_mark + +# Use iptable to "see" if SKBs are marked +iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29 +iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a + +# Hint: catch XDP_ABORTED errors via +perf record -e xdp:* +perf script + +*/ |