diff options
135 files changed, 4603 insertions, 1013 deletions
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 38b4db8be7a2..44ca8ea5a723 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -5,10 +5,10 @@ BPF Documentation This directory contains documentation for the BPF (Berkeley Packet Filter) facility, with a focus on the extended BPF version (eBPF). -This kernel side documentation is still work in progress. The main +This kernel side documentation is still work in progress. The main textual documentation is (for historical reasons) described in -`Documentation/networking/filter.rst`_, which describe both classical -and extended BPF instruction-set. +:ref:`networking-filter`, which describe both classical and extended +BPF instruction-set. The Cilium project also maintains a `BPF and XDP Reference Guide`_ that goes into great technical depth about the BPF Architecture. @@ -48,6 +48,15 @@ Program types bpf_lsm +Map types +========= + +.. toctree:: + :maxdepth: 1 + + map_cgroup_storage + + Testing and debugging BPF ========================= @@ -59,7 +68,7 @@ Testing and debugging BPF .. Links: -.. _Documentation/networking/filter.rst: ../networking/filter.txt +.. _networking-filter: ../networking/filter.rst .. _man-pages: https://www.kernel.org/doc/man-pages/ -.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html -.. _BPF and XDP Reference Guide: http://cilium.readthedocs.io/en/latest/bpf/ +.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html +.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/ diff --git a/Documentation/bpf/map_cgroup_storage.rst b/Documentation/bpf/map_cgroup_storage.rst new file mode 100644 index 000000000000..cab9543017bf --- /dev/null +++ b/Documentation/bpf/map_cgroup_storage.rst @@ -0,0 +1,169 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2020 Google LLC. + +=========================== +BPF_MAP_TYPE_CGROUP_STORAGE +=========================== + +The ``BPF_MAP_TYPE_CGROUP_STORAGE`` map type represents a local fix-sized +storage. It is only available with ``CONFIG_CGROUP_BPF``, and to programs that +attach to cgroups; the programs are made available by the same Kconfig. The +storage is identified by the cgroup the program is attached to. + +The map provide a local storage at the cgroup that the BPF program is attached +to. It provides a faster and simpler access than the general purpose hash +table, which performs a hash table lookups, and requires user to track live +cgroups on their own. + +This document describes the usage and semantics of the +``BPF_MAP_TYPE_CGROUP_STORAGE`` map type. Some of its behaviors was changed in +Linux 5.9 and this document will describe the differences. + +Usage +===== + +The map uses key of type of either ``__u64 cgroup_inode_id`` or +``struct bpf_cgroup_storage_key``, declared in ``linux/bpf.h``:: + + struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; + __u32 attach_type; + }; + +``cgroup_inode_id`` is the inode id of the cgroup directory. +``attach_type`` is the the program's attach type. + +Linux 5.9 added support for type ``__u64 cgroup_inode_id`` as the key type. +When this key type is used, then all attach types of the particular cgroup and +map will share the same storage. Otherwise, if the type is +``struct bpf_cgroup_storage_key``, then programs of different attach types +be isolated and see different storages. + +To access the storage in a program, use ``bpf_get_local_storage``:: + + void *bpf_get_local_storage(void *map, u64 flags) + +``flags`` is reserved for future use and must be 0. + +There is no implicit synchronization. Storages of ``BPF_MAP_TYPE_CGROUP_STORAGE`` +can be accessed by multiple programs across different CPUs, and user should +take care of synchronization by themselves. The bpf infrastructure provides +``struct bpf_spin_lock`` to synchronize the storage. See +``tools/testing/selftests/bpf/progs/test_spin_lock.c``. + +Examples +======== + +Usage with key type as ``struct bpf_cgroup_storage_key``:: + + #include <bpf/bpf.h> + + struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); + } cgroup_storage SEC(".maps"); + + int program(struct __sk_buff *skb) + { + __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0); + __sync_fetch_and_add(ptr, 1); + + return 0; + } + +Userspace accessing map declared above:: + + #include <linux/bpf.h> + #include <linux/libbpf.h> + + __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type) + { + struct bpf_cgroup_storage_key = { + .cgroup_inode_id = cgrp, + .attach_type = type, + }; + __u32 value; + bpf_map_lookup_elem(bpf_map__fd(map), &key, &value); + // error checking omitted + return value; + } + +Alternatively, using just ``__u64 cgroup_inode_id`` as key type:: + + #include <bpf/bpf.h> + + struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, __u32); + } cgroup_storage SEC(".maps"); + + int program(struct __sk_buff *skb) + { + __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0); + __sync_fetch_and_add(ptr, 1); + + return 0; + } + +And userspace:: + + #include <linux/bpf.h> + #include <linux/libbpf.h> + + __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type) + { + __u32 value; + bpf_map_lookup_elem(bpf_map__fd(map), &cgrp, &value); + // error checking omitted + return value; + } + +Semantics +========= + +``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE`` is a variant of this map type. This +per-CPU variant will have different memory regions for each CPU for each +storage. The non-per-CPU will have the same memory region for each storage. + +Prior to Linux 5.9, the lifetime of a storage is precisely per-attachment, and +for a single ``CGROUP_STORAGE`` map, there can be at most one program loaded +that uses the map. A program may be attached to multiple cgroups or have +multiple attach types, and each attach creates a fresh zeroed storage. The +storage is freed upon detach. + +There is a one-to-one association between the map of each type (per-CPU and +non-per-CPU) and the BPF program during load verification time. As a result, +each map can only be used by one BPF program and each BPF program can only use +one storage map of each type. Because of map can only be used by one BPF +program, sharing of this cgroup's storage with other BPF programs were +impossible. + +Since Linux 5.9, storage can be shared by multiple programs. When a program is +attached to a cgroup, the kernel would create a new storage only if the map +does not already contain an entry for the cgroup and attach type pair, or else +the old storage is reused for the new attachment. If the map is attach type +shared, then attach type is simply ignored during comparison. Storage is freed +only when either the map or the cgroup attached to is being freed. Detaching +will not directly free the storage, but it may cause the reference to the map +to reach zero and indirectly freeing all storage in the map. + +The map is not associated with any BPF program, thus making sharing possible. +However, the BPF program can still only associate with one map of each type +(per-CPU and non-per-CPU). A BPF program cannot use more than one +``BPF_MAP_TYPE_CGROUP_STORAGE`` or more than one +``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE``. + +In all versions, userspace may use the the attach parameters of cgroup and +attach type pair in ``struct bpf_cgroup_storage_key`` as the key to the BPF map +APIs to read or update the storage for a given attachment. For Linux 5.9 +attach type shared storages, only the first value in the struct, cgroup inode +id, is used during comparison, so userspace may just specify a ``__u64`` +directly. + +The storage is bound at attach time. Even if the program is attached to parent +and triggers in child, the storage still belongs to the parent. + +Userspace cannot create a new entry in the map or delete an existing entry. +Program test runs always use a temporary storage. diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index a1d3e192b9fa..debb59e374de 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -1,5 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _networking-filter: + ======================================================= Linux Socket Filtering aka Berkeley Packet Filter (BPF) ======================================================= diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 56a4f68b262e..840a35ed92ec 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,5 +22,17 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +#ifdef CONFIG_BPF_JIT +int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs); +#else /* !CONFIG_BPF_JIT */ +static inline +int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + return 0; +} +#endif /* !CONFIG_BPF_JIT */ + extern int fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 81e694af5f8c..eee1732ab6cd 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -11,8 +11,14 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; fixup = search_exception_tables(instruction_pointer(regs)); - if (fixup) - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; + if (!fixup) + return 0; - return fixup != NULL; + if (IS_ENABLED(CONFIG_BPF_JIT) && + regs->pc >= BPF_JIT_REGION_START && + regs->pc < BPF_JIT_REGION_END) + return arm64_bpf_fixup_exception(fixup, regs); + + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; + return 1; } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 3cb25b43b368..f8912e45be7a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/printk.h> @@ -56,6 +57,7 @@ struct jit_ctx { int idx; int epilogue_offset; int *offset; + int exentry_idx; __le32 *image; u32 stack_size; }; @@ -351,6 +353,67 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_RET(A64_LR), ctx); } +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) + +int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + + regs->regs[dst_reg] = 0; + regs->pc = (unsigned long)&ex->fixup - offset; + return 1; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + + if (!ctx->image) + /* First pass */ + return 0; + + if (BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (!ctx->prog->aux->extable || + WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; + pc = (unsigned long)&ctx->image[ctx->idx - 1]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = offset; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + */ + offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + return -ERANGE; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ctx->exentry_idx++; + return 0; +} + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -375,6 +438,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 jmp_cond, reg; s32 jmp_offset; u32 a64_insn; + int ret; #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -694,7 +758,6 @@ emit_cond_jmp: const u8 r0 = bpf2a64[BPF_REG_0]; bool func_addr_fixed; u64 func_addr; - int ret; ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); @@ -738,6 +801,10 @@ emit_cond_jmp: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: emit_a64_mov_i(1, tmp, off, ctx); switch (BPF_SIZE(code)) { case BPF_W: @@ -753,6 +820,10 @@ emit_cond_jmp: emit(A64_LDR64(dst, src, tmp), ctx); break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* ST: *(size *)(dst + off) = imm */ @@ -868,6 +939,9 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) + return -1; + return 0; } @@ -884,6 +958,7 @@ struct arm64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + int image_size, prog_size, extable_size; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -891,7 +966,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - int image_size; u8 *image_ptr; if (!prog->jit_requested) @@ -922,7 +996,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) image_ptr = jit_data->image; header = jit_data->header; extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); @@ -950,8 +1024,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; + image_size = prog_size + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -962,8 +1040,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ ctx.image = (__le32 *)image_ptr; + if (extable_size) + prog->aux->extable = (void *)image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; + ctx.exentry_idx = 0; build_prologue(&ctx, was_classic); @@ -984,7 +1065,7 @@ skip_init_ctx: /* And we're done. */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); bpf_flush_icache(header, ctx.image + ctx.idx); @@ -1005,7 +1086,7 @@ skip_init_ctx: } prog->bpf_func = (void *)ctx.image; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { bpf_prog_fill_jited_linfo(prog, ctx.offset); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6478c1e0d137..2a6c9725e092 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -576,15 +576,9 @@ static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) */ static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) { - struct ena_adapter *adapter = netdev_priv(netdev); - switch (bpf->command) { case XDP_SETUP_PROG: return ena_xdp_set(netdev, bpf); - case XDP_QUERY_PROG: - bpf->prog_id = adapter->xdp_bpf_prog ? - adapter->xdp_bpf_prog->aux->id : 0; - break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 5e3b4a3b69ea..2704a4709bc7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -330,10 +330,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_SETUP_PROG: rc = bnxt_xdp_set(bp, xdp->prog); break; - case XDP_QUERY_PROG: - xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0; - rc = 0; - break; default: rc = -EINVAL; break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 2ba0ce115e63..1c6163934e20 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1906,9 +1906,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return nicvf_xdp_setup(nic, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 50f52fe2012f..83b1e974bff0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2077,14 +2077,9 @@ out_err: static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp) { - struct dpaa2_eth_priv *priv = netdev_priv(dev); - switch (xdp->command) { case XDP_SETUP_PROG: return setup_xdp(dev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0; - break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dadbfb3d2a2b..d8315811cbdf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12923,9 +12923,6 @@ static int i40e_xdp(struct net_device *dev, switch (xdp->command) { case XDP_SETUP_PROG: return i40e_xdp_setup(vsi, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; - return 0; case XDP_SETUP_XSK_UMEM: return i40e_xsk_umem_setup(vsi, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 22e3d32463f1..8437d72795b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2549,9 +2549,6 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; - return 0; case XDP_SETUP_XSK_UMEM: return ice_xsk_umem_setup(vsi, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e339edd0b593..2f8a4cfc5fa1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10160,10 +10160,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return ixgbe_xdp_setup(dev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = adapter->xdp_prog ? - adapter->xdp_prog->aux->id : 0; - return 0; case XDP_SETUP_XSK_UMEM: return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c3d26cc0cf51..a428113e6d54 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4477,15 +4477,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) { - struct ixgbevf_adapter *adapter = netdev_priv(dev); - switch (xdp->command) { case XDP_SETUP_PROG: return ixgbevf_xdp_setup(dev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = adapter->xdp_prog ? - adapter->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c9b6b0f85bb0..832bbb8b05c8 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4442,14 +4442,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp) { - struct mvneta_port *pp = netdev_priv(dev); - switch (xdp->command) { case XDP_SETUP_PROG: return mvneta_xdp_setup(dev, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4298a029be55..2a8a5842eaef 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4657,9 +4657,6 @@ static int mvpp2_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return mvpp2_xdp_setup(port, xdp); - case XDP_QUERY_PROG: - xdp->prog_id = port->xdp_prog ? port->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 2b8608f8f0a9..106513f772c3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2802,35 +2802,11 @@ unlock_out: return err; } -static u32 mlx4_xdp_query(struct net_device *dev) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - const struct bpf_prog *xdp_prog; - u32 prog_id = 0; - - if (!priv->tx_ring_num[TX_XDP]) - return prog_id; - - mutex_lock(&mdev->state_lock); - xdp_prog = rcu_dereference_protected( - priv->rx_ring[0]->xdp_prog, - lockdep_is_held(&mdev->state_lock)); - if (xdp_prog) - prog_id = xdp_prog->aux->id; - mutex_unlock(&mdev->state_lock); - - return prog_id; -} - static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return mlx4_xdp_set(dev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = mlx4_xdp_query(dev); - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8f26cd951ff5..aebcf73f8546 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4418,29 +4418,11 @@ unlock: return err; } -static u32 mlx5e_xdp_query(struct net_device *dev) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - const struct bpf_prog *xdp_prog; - u32 prog_id = 0; - - mutex_lock(&priv->state_lock); - xdp_prog = priv->channels.params.xdp_prog; - if (xdp_prog) - prog_id = xdp_prog->aux->id; - mutex_unlock(&priv->state_lock); - - return prog_id; -} - static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return mlx5e_xdp_set(dev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = mlx5e_xdp_query(dev); - return 0; case XDP_SETUP_XSK_UMEM: return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 44608873d3d9..39ee23e8c0bf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3614,10 +3614,6 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: return nfp_net_xdp_setup_hw(nn, xdp); - case XDP_QUERY_PROG: - return xdp_attachment_query(&nn->xdp, xdp); - case XDP_QUERY_PROG_HW: - return xdp_attachment_query(&nn->xdp_hw, xdp); default: return nfp_app_bpf(nn->app, nn, xdp); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index b7d0b6ccebd3..f961f65d9372 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1045,9 +1045,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return qede_xdp_set(edev, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index f5aa1bd02f19..e06fa89f2d72 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -654,15 +654,10 @@ static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog) static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct efx_nic *efx = netdev_priv(dev); - struct bpf_prog *xdp_prog; switch (xdp->command) { case XDP_SETUP_PROG: return efx_xdp_setup_prog(efx, xdp->prog); - case XDP_QUERY_PROG: - xdp_prog = rtnl_dereference(efx->xdp_prog); - xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 0f366cc50b74..25db667fa879 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1811,9 +1811,6 @@ static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return netsec_xdp_setup(priv, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index a399f3659346..d6d7a7d9c7ad 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1286,9 +1286,6 @@ int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) case XDP_SETUP_PROG: return cpsw_xdp_prog_setup(priv, bpf); - case XDP_QUERY_PROG: - return xdp_attachment_query(&priv->xdpi, bpf); - default: return -EINVAL; } diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 8e4141552423..440486d9c999 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -163,16 +163,6 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) return ret; } -static u32 netvsc_xdp_query(struct netvsc_device *nvdev) -{ - struct bpf_prog *prog = netvsc_xdp_get(nvdev); - - if (prog) - return prog->aux->id; - - return 0; -} - int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct net_device_context *ndevctx = netdev_priv(dev); @@ -182,12 +172,7 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) int ret; if (!nvdev || nvdev->destroy) { - if (bpf->command == XDP_QUERY_PROG) { - bpf->prog_id = 0; - return 0; /* Query must always succeed */ - } else { - return -ENODEV; - } + return -ENODEV; } switch (bpf->command) { @@ -208,10 +193,6 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) return ret; - case XDP_QUERY_PROG: - bpf->prog_id = netvsc_xdp_query(nvdev); - return 0; - default: return -EINVAL; } diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 0b362b8dac17..2e90512f3bbe 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -551,10 +551,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) ASSERT_RTNL(); switch (bpf->command) { - case XDP_QUERY_PROG: - return xdp_attachment_query(&ns->xdp, bpf); - case XDP_QUERY_PROG_HW: - return xdp_attachment_query(&ns->xdp_hw, bpf); case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d164052e0393..284f7092241d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -121,7 +121,7 @@ static inline void nsim_bpf_uninit(struct netdevsim *ns) static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { - return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP; + return -EOPNOTSUPP; } static inline int nsim_bpf_disable_tc(struct netdevsim *ns) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9b4b25358f9b..3c11a77f5709 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1184,26 +1184,11 @@ static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, return 0; } -static u32 tun_xdp_query(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - const struct bpf_prog *xdp_prog; - - xdp_prog = rtnl_dereference(tun->xdp_prog); - if (xdp_prog) - return xdp_prog->aux->id; - - return 0; -} - static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return tun_xdp_set(dev, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = tun_xdp_query(dev); - return 0; default: return -EINVAL; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index b594f03eeddb..e56cd562a664 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1198,26 +1198,11 @@ err: return err; } -static u32 veth_xdp_query(struct net_device *dev) -{ - struct veth_priv *priv = netdev_priv(dev); - const struct bpf_prog *xdp_prog; - - xdp_prog = priv->_xdp_prog; - if (xdp_prog) - return xdp_prog->aux->id; - - return 0; -} - static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return veth_xdp_set(dev, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = veth_xdp_query(dev); - return 0; default: return -EINVAL; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba38765dc490..6fa8fe5ef160 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2490,28 +2490,11 @@ err: return err; } -static u32 virtnet_xdp_query(struct net_device *dev) -{ - struct virtnet_info *vi = netdev_priv(dev); - const struct bpf_prog *xdp_prog; - int i; - - for (i = 0; i < vi->max_queue_pairs; i++) { - xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog); - if (xdp_prog) - return xdp_prog->aux->id; - } - return 0; -} - static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return virtnet_xdp_set(dev, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = virtnet_xdp_query(dev); - return 0; default: return -EINVAL; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a63e550c370e..458be6882b98 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1480,32 +1480,11 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return 0; } -static u32 xennet_xdp_query(struct net_device *dev) -{ - unsigned int num_queues = dev->real_num_tx_queues; - struct netfront_info *np = netdev_priv(dev); - const struct bpf_prog *xdp_prog; - struct netfront_queue *queue; - unsigned int i; - - for (i = 0; i < num_queues; ++i) { - queue = &np->queues[i]; - xdp_prog = rtnl_dereference(queue->xdp_prog); - if (xdp_prog) - return xdp_prog->aux->id; - } - - return 0; -} - static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return xennet_xdp_set(dev, xdp->prog, xdp->extack); - case XDP_QUERY_PROG: - xdp->prog_id = xennet_xdp_query(dev); - return 0; default: return -EINVAL; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index dba63b2429f0..ed8a6306990c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -98,7 +98,7 @@ static const struct proc_ops proc_net_seq_ops = { .proc_release = seq_release_net, }; -int bpf_iter_init_seq_net(void *priv_data) +int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux) { #ifdef CONFIG_NET_NS struct seq_net_private *p = priv_data; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2c6f26670acc..64f367044e25 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -46,7 +46,8 @@ struct bpf_cgroup_storage { }; struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_key key; - struct list_head list; + struct list_head list_map; + struct list_head list_cg; struct rb_node node; struct rcu_head rcu; }; @@ -78,6 +79,9 @@ struct cgroup_bpf { struct list_head progs[MAX_BPF_ATTACH_TYPE]; u32 flags[MAX_BPF_ATTACH_TYPE]; + /* list of cgroup shared storages */ + struct list_head storages; + /* temp storage for effective prog array used by prog_attach/detach */ struct bpf_prog_array *inactive; @@ -161,6 +165,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); } +struct bpf_cgroup_storage * +cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, + void *key, bool locked); struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, enum bpf_cgroup_storage_type stype); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); @@ -169,7 +176,6 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, enum bpf_attach_type type); void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); -void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map); int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, @@ -383,8 +389,6 @@ static inline void bpf_cgroup_storage_set( struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } -static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, - struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } static inline void bpf_cgroup_storage_free( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bae557ff2da8..cef4ef0d2b4e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -33,10 +33,21 @@ struct btf; struct btf_type; struct exception_table_entry; struct seq_operations; +struct bpf_iter_aux_info; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +struct bpf_iter_seq_info { + const struct seq_operations *seq_ops; + bpf_iter_init_seq_priv_t init_seq_private; + bpf_iter_fini_seq_priv_t fini_seq_private; + u32 seq_priv_size; +}; + /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ @@ -96,6 +107,9 @@ struct bpf_map_ops { /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; + + /* bpf_iter info used to open a seq_file */ + const struct bpf_iter_seq_info *iter_seq_info; }; struct bpf_map_memory { @@ -342,6 +356,10 @@ enum bpf_reg_type { PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ + PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ + PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ + PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ + PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -683,6 +701,8 @@ struct bpf_prog_aux { u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; + u32 max_rdonly_access; + u32 max_rdwr_access; const struct bpf_ctx_arg_aux *ctx_arg_info; struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ @@ -761,6 +781,33 @@ struct bpf_array_aux { struct work_struct work; }; +struct bpf_link { + atomic64_t refcnt; + u32 id; + enum bpf_link_type type; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + int (*detach)(struct bpf_link *link); + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); + void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); + int (*fill_link_info)(const struct bpf_link *link, + struct bpf_link_info *info); +}; + +struct bpf_link_primer { + struct bpf_link *link; + struct file *file; + int fd; + u32 id; +}; + struct bpf_struct_ops_value; struct btf_type; struct btf_member; @@ -1117,6 +1164,7 @@ int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); struct bpf_map *bpf_map_get_curr_or_next(u32 *id); +struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); extern int sysctl_unprivileged_bpf_disabled; @@ -1143,32 +1191,6 @@ static inline bool bpf_bypass_spec_v4(void) int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); -struct bpf_link { - atomic64_t refcnt; - u32 id; - enum bpf_link_type type; - const struct bpf_link_ops *ops; - struct bpf_prog *prog; - struct work_struct work; -}; - -struct bpf_link_primer { - struct bpf_link *link; - struct file *file; - int fd; - u32 id; -}; - -struct bpf_link_ops { - void (*release)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, - struct bpf_prog *old_prog); - void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); - int (*fill_link_info)(const struct bpf_link *link, - struct bpf_link_info *info); -}; - void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); @@ -1188,18 +1210,21 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } -typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); -typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +struct bpf_iter_aux_info { + struct bpf_map *map; +}; + +typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - const struct seq_operations *seq_ops; - bpf_iter_init_seq_priv_t init_seq_private; - bpf_iter_fini_seq_priv_t fini_seq_private; - u32 seq_priv_size; + bpf_iter_check_target_t check_target; u32 ctx_arg_info_size; + enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; + const struct bpf_iter_seq_info *seq_info; }; struct bpf_iter_meta { @@ -1208,6 +1233,13 @@ struct bpf_iter_meta { u64 seq_num; }; +struct bpf_iter__bpf_map_elem { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(void *, key); + __bpf_md_ptr(void *, value); +}; + int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); @@ -1370,6 +1402,35 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, + struct bpf_prog *prog) +{ +} + +static inline int bpf_link_prime(struct bpf_link *link, + struct bpf_link_primer *primer) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_link_settle(struct bpf_link_primer *primer) +{ + return -EOPNOTSUPP; +} + +static inline void bpf_link_cleanup(struct bpf_link_primer *primer) +{ +} + +static inline void bpf_link_inc(struct bpf_link *link) +{ +} + +static inline void bpf_link_put(struct bpf_link *link) +{ +} + static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; @@ -1644,6 +1705,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_stackid_proto_pe; +extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; diff --git a/include/linux/filter.h b/include/linux/filter.h index d07a6e973a7d..0a355b005bf4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -533,7 +533,8 @@ struct bpf_prog { is_func:1, /* program is a bpf function */ kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ - enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */ + enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ + call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba0fa6b22787..88d40b9abaa1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -876,8 +876,6 @@ enum bpf_netdev_command { */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, - XDP_QUERY_PROG, - XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, @@ -888,6 +886,19 @@ struct bpf_prog_offload_ops; struct netlink_ext_ack; struct xdp_umem; struct xdp_dev_bulk_queue; +struct bpf_xdp_link; + +enum bpf_xdp_mode { + XDP_MODE_SKB = 0, + XDP_MODE_DRV = 1, + XDP_MODE_HW = 2, + __MAX_XDP_MODE +}; + +struct bpf_xdp_entity { + struct bpf_prog *prog; + struct bpf_xdp_link *link; +}; struct netdev_bpf { enum bpf_netdev_command command; @@ -898,12 +909,6 @@ struct netdev_bpf { struct bpf_prog *prog; struct netlink_ext_ack *extack; }; - /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */ - struct { - u32 prog_id; - /* flags with which program was installed */ - u32 prog_flags; - }; /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */ struct { struct bpf_offloaded_map *offmap; @@ -2144,6 +2149,9 @@ struct net_device { #endif const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + + /* protected by rtnl_lock */ + struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3821,8 +3829,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, - enum bpf_netdev_command cmd); +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); + int xdp_umem_query(struct net_device *dev, u16 queue_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d1eed1b43651..2df965cd0974 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); -extern int bpf_iter_init_seq_net(void *priv_data); +struct bpf_iter_aux_info; +extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux); extern void bpf_iter_fini_seq_net(void *priv_data); #ifdef CONFIG_PROC_PID_ARCH_STATUS diff --git a/include/net/xdp.h b/include/net/xdp.h index dbe9c60797e1..3814fb631d52 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -240,8 +240,6 @@ struct xdp_attachment_info { }; struct netdev_bpf; -int xdp_attachment_query(struct xdp_attachment_info *info, - struct netdev_bpf *bpf); bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54d0c886e3ba..b134e679e9db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_cmd { BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, BPF_ITER_CREATE, + BPF_LINK_DETACH, }; enum bpf_map_type { @@ -230,6 +231,7 @@ enum bpf_attach_type { BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, BPF_SK_LOOKUP, + BPF_XDP, __MAX_BPF_ATTACH_TYPE }; @@ -242,10 +244,18 @@ enum bpf_link_type { BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, + BPF_LINK_TYPE_XDP = 6, MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -607,7 +617,10 @@ union bpf_attr { struct { /* struct used by BPF_LINK_CREATE command */ __u32 prog_fd; /* eBPF program to attach */ - __u32 target_fd; /* object to attach to */ + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ } link_create; @@ -622,6 +635,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { + __u32 link_fd; + } link_detach; + struct { /* struct used by BPF_ENABLE_STATS command */ __u32 type; } enable_stats; @@ -3229,7 +3246,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification @@ -4057,6 +4074,9 @@ struct bpf_link_info { __u32 netns_ino; __u32 attach_type; } netns; + struct { + __u32 ifindex; + } xdp; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 1131a921e1a6..e6eb9c0402da 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,7 +2,7 @@ obj-y := core.o CFLAGS_core.o += $(call cc-disable-warning, override-init) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c66e8273fccd..8ff419b632a6 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +struct bpf_iter_seq_array_map_info { + struct bpf_map *map; + void *percpu_value_buf; + u32 index; +}; + +static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + if (info->index >= map->max_entries) + return NULL; + + if (*pos == 0) + ++*pos; + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + ++*pos; + ++info->index; + if (info->index >= map->max_entries) + return NULL; + + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_iter__bpf_map_elem ctx = {}; + struct bpf_map *map = info->map; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int off = 0, cpu = 0; + void __percpu **pptr; + u32 size; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, v == NULL); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.map = info->map; + if (v) { + ctx.key = &info->index; + + if (!info->percpu_value_buf) { + ctx.value = v; + } else { + pptr = v; + size = round_up(map->value_size, 8); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu), + size); + off += size; + } + ctx.value = info->percpu_value_buf; + } + } + + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_array_map_seq_show(seq, v); +} + +static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_array_map_seq_show(seq, NULL); +} + +static int bpf_iter_init_array_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + struct bpf_map *map = aux->map; + void *value_buf; + u32 buf_size; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); + if (!value_buf) + return -ENOMEM; + + seq_info->percpu_value_buf = value_buf; + } + + seq_info->map = map; + return 0; +} + +static void bpf_iter_fini_array_map(void *priv_data) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + + kfree(seq_info->percpu_value_buf); +} + +static const struct seq_operations bpf_array_map_seq_ops = { + .start = bpf_array_map_seq_start, + .next = bpf_array_map_seq_next, + .stop = bpf_array_map_seq_stop, + .show = bpf_array_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_array_map_seq_ops, + .init_seq_private = bpf_iter_init_array_map, + .fini_seq_private = bpf_iter_fini_array_map, + .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), +}; + static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, @@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_btf_name = "bpf_array", .map_btf_id = &array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int percpu_array_map_btf_id; @@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_check_btf = array_map_check_btf, .map_btf_name = "bpf_array", .map_btf_id = &percpu_array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int fd_array_map_alloc_check(union bpf_attr *attr) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index dd612b80b9fe..363b9cafc2d8 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -14,11 +14,13 @@ struct bpf_iter_target_info { struct bpf_iter_link { struct bpf_link link; + struct bpf_iter_aux_info aux; struct bpf_iter_target_info *tinfo; }; struct bpf_iter_priv_data { struct bpf_iter_target_info *tinfo; + const struct bpf_iter_seq_info *seq_info; struct bpf_prog *prog; u64 session_id; u64 seq_num; @@ -35,7 +37,8 @@ static DEFINE_MUTEX(link_mutex); /* incremented on every opened seq_file */ static atomic64_t session_id; -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, + const struct bpf_iter_seq_info *seq_info); static void bpf_iter_inc_seq_num(struct seq_file *seq) { @@ -199,11 +202,25 @@ done: return copied; } +static const struct bpf_iter_seq_info * +__get_seq_info(struct bpf_iter_link *link) +{ + const struct bpf_iter_seq_info *seq_info; + + if (link->aux.map) { + seq_info = link->aux.map->ops->iter_seq_info; + if (seq_info) + return seq_info; + } + + return link->tinfo->reg_info->seq_info; +} + static int iter_open(struct inode *inode, struct file *file) { struct bpf_iter_link *link = inode->i_private; - return prepare_seq_file(file, link); + return prepare_seq_file(file, link, __get_seq_info(link)); } static int iter_release(struct inode *inode, struct file *file) @@ -218,8 +235,8 @@ static int iter_release(struct inode *inode, struct file *file) iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); - if (iter_priv->tinfo->reg_info->fini_seq_private) - iter_priv->tinfo->reg_info->fini_seq_private(seq->private); + if (iter_priv->seq_info->fini_seq_private) + iter_priv->seq_info->fini_seq_private(seq->private); bpf_prog_put(iter_priv->prog); seq->private = iter_priv; @@ -318,6 +335,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog) static void bpf_iter_link_release(struct bpf_link *link) { + struct bpf_iter_link *iter_link = + container_of(link, struct bpf_iter_link, link); + + if (iter_link->aux.map) + bpf_map_put_with_uref(iter_link->aux.map); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -370,14 +392,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; + struct bpf_iter_aux_info aux = {}; struct bpf_iter_link *link; + u32 prog_btf_id, target_fd; bool existed = false; - u32 prog_btf_id; + struct bpf_map *map; int err; - if (attr->link_create.target_fd || attr->link_create.flags) - return -EINVAL; - prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -390,6 +411,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; + /* Make sure user supplied flags are target expected. */ + target_fd = attr->link_create.target_fd; + if (attr->link_create.flags != tinfo->reg_info->req_linfo) + return -EINVAL; + if (!attr->link_create.flags && target_fd) + return -EINVAL; + link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -403,21 +431,45 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } + if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { + map = bpf_map_get_with_uref(target_fd); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto cleanup_link; + } + + aux.map = map; + err = tinfo->reg_info->check_target(prog, &aux); + if (err) { + bpf_map_put_with_uref(map); + goto cleanup_link; + } + + link->aux.map = map; + } + return bpf_link_settle(&link_primer); + +cleanup_link: + bpf_link_cleanup(&link_primer); + return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, struct bpf_iter_target_info *tinfo, + const struct bpf_iter_seq_info *seq_info, struct bpf_prog *prog) { priv_data->tinfo = tinfo; + priv_data->seq_info = seq_info; priv_data->prog = prog; priv_data->session_id = atomic64_inc_return(&session_id); priv_data->seq_num = 0; priv_data->done_stop = false; } -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, + const struct bpf_iter_seq_info *seq_info) { struct bpf_iter_priv_data *priv_data; struct bpf_iter_target_info *tinfo; @@ -433,21 +485,21 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) tinfo = link->tinfo; total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + - tinfo->reg_info->seq_priv_size; - priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops, + seq_info->seq_priv_size; + priv_data = __seq_open_private(file, seq_info->seq_ops, total_priv_dsize); if (!priv_data) { err = -ENOMEM; goto release_prog; } - if (tinfo->reg_info->init_seq_private) { - err = tinfo->reg_info->init_seq_private(priv_data->target_private); + if (seq_info->init_seq_private) { + err = seq_info->init_seq_private(priv_data->target_private, &link->aux); if (err) goto release_seq_file; } - init_seq_meta(priv_data, tinfo, prog); + init_seq_meta(priv_data, tinfo, seq_info, prog); seq = file->private_data; seq->private = priv_data->target_private; @@ -463,6 +515,7 @@ release_prog: int bpf_iter_new_fd(struct bpf_link *link) { + struct bpf_iter_link *iter_link; struct file *file; unsigned int flags; int err, fd; @@ -481,8 +534,8 @@ int bpf_iter_new_fd(struct bpf_link *link) goto free_fd; } - err = prepare_seq_file(file, - container_of(link, struct bpf_iter_link, link)); + iter_link = container_of(link, struct bpf_iter_link, link); + err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); if (err) goto free_file; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4ff0de860ab7..91afdd4c82e3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3806,6 +3806,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, btf_kind_str[BTF_INFO_KIND(t->info)]); return false; } + + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; + + if (ctx_arg_info->offset == off && + (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || + ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; + } + } + if (t->type == 0) /* This is a pointer to void. * It is the same as scalar from the verifier safety pov. diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ac53102e244a..83ff127ef7ae 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -37,17 +37,34 @@ static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[]) } static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[], - struct bpf_prog *prog) + struct bpf_cgroup_storage *new_storages[], + enum bpf_attach_type type, + struct bpf_prog *prog, + struct cgroup *cgrp) { enum bpf_cgroup_storage_type stype; + struct bpf_cgroup_storage_key key; + struct bpf_map *map; + + key.cgroup_inode_id = cgroup_id(cgrp); + key.attach_type = type; for_each_cgroup_storage_type(stype) { + map = prog->aux->cgroup_storage[stype]; + if (!map) + continue; + + storages[stype] = cgroup_storage_lookup((void *)map, &key, false); + if (storages[stype]) + continue; + storages[stype] = bpf_cgroup_storage_alloc(prog, stype); if (IS_ERR(storages[stype])) { - storages[stype] = NULL; - bpf_cgroup_storages_free(storages); + bpf_cgroup_storages_free(new_storages); return -ENOMEM; } + + new_storages[stype] = storages[stype]; } return 0; @@ -63,7 +80,7 @@ static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[], } static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], - struct cgroup* cgrp, + struct cgroup *cgrp, enum bpf_attach_type attach_type) { enum bpf_cgroup_storage_type stype; @@ -72,14 +89,6 @@ static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], bpf_cgroup_storage_link(storages[stype], cgrp, attach_type); } -static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[]) -{ - enum bpf_cgroup_storage_type stype; - - for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_unlink(storages[stype]); -} - /* Called when bpf_cgroup_link is auto-detached from dying cgroup. * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It * doesn't free link memory, which will eventually be done by bpf_link's @@ -101,22 +110,23 @@ static void cgroup_bpf_release(struct work_struct *work) struct cgroup *p, *cgrp = container_of(work, struct cgroup, bpf.release_work); struct bpf_prog_array *old_array; + struct list_head *storages = &cgrp->bpf.storages; + struct bpf_cgroup_storage *storage, *stmp; + unsigned int type; mutex_lock(&cgroup_mutex); for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { struct list_head *progs = &cgrp->bpf.progs[type]; - struct bpf_prog_list *pl, *tmp; + struct bpf_prog_list *pl, *pltmp; - list_for_each_entry_safe(pl, tmp, progs, node) { + list_for_each_entry_safe(pl, pltmp, progs, node) { list_del(&pl->node); if (pl->prog) bpf_prog_put(pl->prog); if (pl->link) bpf_cgroup_link_auto_detach(pl->link); - bpf_cgroup_storages_unlink(pl->storage); - bpf_cgroup_storages_free(pl->storage); kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } @@ -126,6 +136,11 @@ static void cgroup_bpf_release(struct work_struct *work) bpf_prog_array_free(old_array); } + list_for_each_entry_safe(storage, stmp, storages, list_cg) { + bpf_cgroup_storage_unlink(storage); + bpf_cgroup_storage_free(storage); + } + mutex_unlock(&cgroup_mutex); for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) @@ -290,6 +305,8 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) for (i = 0; i < NR; i++) INIT_LIST_HEAD(&cgrp->bpf.progs[i]); + INIT_LIST_HEAD(&cgrp->bpf.storages); + for (i = 0; i < NR; i++) if (compute_effective_progs(cgrp, i, &arrays[i])) goto cleanup; @@ -422,7 +439,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; - struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; + struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; struct bpf_prog_list *pl; int err; @@ -455,17 +472,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, if (IS_ERR(pl)) return PTR_ERR(pl); - if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog)) + if (bpf_cgroup_storages_alloc(storage, new_storage, type, + prog ? : link->link.prog, cgrp)) return -ENOMEM; if (pl) { old_prog = pl->prog; - bpf_cgroup_storages_unlink(pl->storage); - bpf_cgroup_storages_assign(old_storage, pl->storage); } else { pl = kmalloc(sizeof(*pl), GFP_KERNEL); if (!pl) { - bpf_cgroup_storages_free(storage); + bpf_cgroup_storages_free(new_storage); return -ENOMEM; } list_add_tail(&pl->node, progs); @@ -480,12 +496,11 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, if (err) goto cleanup; - bpf_cgroup_storages_free(old_storage); if (old_prog) bpf_prog_put(old_prog); else static_branch_inc(&cgroup_bpf_enabled_key); - bpf_cgroup_storages_link(pl->storage, cgrp, type); + bpf_cgroup_storages_link(new_storage, cgrp, type); return 0; cleanup: @@ -493,9 +508,7 @@ cleanup: pl->prog = old_prog; pl->link = NULL; } - bpf_cgroup_storages_free(pl->storage); - bpf_cgroup_storages_assign(pl->storage, old_storage); - bpf_cgroup_storages_link(pl->storage, cgrp, type); + bpf_cgroup_storages_free(new_storage); if (!old_prog) { list_del(&pl->node); kfree(pl); @@ -679,8 +692,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, /* now can actually delete it from this cgroup list */ list_del(&pl->node); - bpf_cgroup_storages_unlink(pl->storage); - bpf_cgroup_storages_free(pl->storage); kfree(pl); if (list_empty(progs)) /* last program was detached, reset flags to zero */ @@ -803,6 +814,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link) { struct bpf_cgroup_link *cg_link = container_of(link, struct bpf_cgroup_link, link); + struct cgroup *cg; /* link might have been auto-detached by dying cgroup already, * in that case our work is done here @@ -821,8 +833,12 @@ static void bpf_cgroup_link_release(struct bpf_link *link) WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, cg_link->type)); + cg = cg_link->cgroup; + cg_link->cgroup = NULL; + mutex_unlock(&cgroup_mutex); - cgroup_put(cg_link->cgroup); + + cgroup_put(cg); } static void bpf_cgroup_link_dealloc(struct bpf_link *link) @@ -833,6 +849,13 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link) kfree(cg_link); } +static int bpf_cgroup_link_detach(struct bpf_link *link) +{ + bpf_cgroup_link_release(link); + + return 0; +} + static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { @@ -872,6 +895,7 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_cgroup_link_lops = { .release = bpf_cgroup_link_release, .dealloc = bpf_cgroup_link_dealloc, + .detach = bpf_cgroup_link_detach, .update_prog = cgroup_bpf_replace, .show_fdinfo = bpf_cgroup_link_show_fdinfo, .fill_link_info = bpf_cgroup_link_fill_link_info, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7be02e555ab9..bde93344164d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2097,24 +2097,12 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array, : 0; } -static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux) -{ - enum bpf_cgroup_storage_type stype; - - for_each_cgroup_storage_type(stype) { - if (!aux->cgroup_storage[stype]) - continue; - bpf_cgroup_storage_release(aux, aux->cgroup_storage[stype]); - } -} - void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len) { struct bpf_map *map; u32 i; - bpf_free_cgroup_storage(aux); for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 7b136953f214..78dfff6a501b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1618,6 +1618,196 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, true, false); } +struct bpf_iter_seq_hash_map_info { + struct bpf_map *map; + struct bpf_htab *htab; + void *percpu_value_buf; // non-zero means percpu hash + unsigned long flags; + u32 bucket_id; + u32 skip_elems; +}; + +static struct htab_elem * +bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, + struct htab_elem *prev_elem) +{ + const struct bpf_htab *htab = info->htab; + unsigned long flags = info->flags; + u32 skip_elems = info->skip_elems; + u32 bucket_id = info->bucket_id; + struct hlist_nulls_head *head; + struct hlist_nulls_node *n; + struct htab_elem *elem; + struct bucket *b; + u32 i, count; + + if (bucket_id >= htab->n_buckets) + return NULL; + + /* try to find next elem in the same bucket */ + if (prev_elem) { + /* no update/deletion on this bucket, prev_elem should be still valid + * and we won't skip elements. + */ + n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); + elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); + if (elem) + return elem; + + /* not found, unlock and go to the next bucket */ + b = &htab->buckets[bucket_id++]; + htab_unlock_bucket(htab, b, flags); + skip_elems = 0; + } + + for (i = bucket_id; i < htab->n_buckets; i++) { + b = &htab->buckets[i]; + flags = htab_lock_bucket(htab, b); + + count = 0; + head = &b->head; + hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + if (count >= skip_elems) { + info->flags = flags; + info->bucket_id = i; + info->skip_elems = count; + return elem; + } + count++; + } + + htab_unlock_bucket(htab, b, flags); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + struct htab_elem *elem; + + elem = bpf_hash_map_seq_find_next(info, NULL); + if (!elem) + return NULL; + + if (*pos == 0) + ++*pos; + return elem; +} + +static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_hash_map_seq_find_next(info, v); +} + +static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + u32 roundup_key_size, roundup_value_size; + struct bpf_iter__bpf_map_elem ctx = {}; + struct bpf_map *map = info->map; + struct bpf_iter_meta meta; + int ret = 0, off = 0, cpu; + struct bpf_prog *prog; + void __percpu *pptr; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, elem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (elem) { + roundup_key_size = round_up(map->key_size, 8); + ctx.key = elem->key; + if (!info->percpu_value_buf) { + ctx.value = elem->key + roundup_key_size; + } else { + roundup_value_size = round_up(map->value_size, 8); + pptr = htab_elem_get_ptr(elem, map->key_size); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu), + roundup_value_size); + off += roundup_value_size; + } + ctx.value = info->percpu_value_buf; + } + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_hash_map_seq_show(seq, v); +} + +static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + + if (!v) + (void)__bpf_hash_map_seq_show(seq, NULL); + else + htab_unlock_bucket(info->htab, + &info->htab->buckets[info->bucket_id], + info->flags); +} + +static int bpf_iter_init_hash_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + struct bpf_map *map = aux->map; + void *value_buf; + u32 buf_size; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); + if (!value_buf) + return -ENOMEM; + + seq_info->percpu_value_buf = value_buf; + } + + seq_info->map = map; + seq_info->htab = container_of(map, struct bpf_htab, map); + return 0; +} + +static void bpf_iter_fini_hash_map(void *priv_data) +{ + struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + + kfree(seq_info->percpu_value_buf); +} + +static const struct seq_operations bpf_hash_map_seq_ops = { + .start = bpf_hash_map_seq_start, + .next = bpf_hash_map_seq_next, + .stop = bpf_hash_map_seq_stop, + .show = bpf_hash_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_hash_map_seq_ops, + .init_seq_private = bpf_iter_init_hash_map, + .fini_seq_private = bpf_iter_fini_hash_map, + .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), +}; + static int htab_map_btf_id; const struct bpf_map_ops htab_map_ops = { .map_alloc_check = htab_map_alloc_check, @@ -1632,6 +1822,7 @@ const struct bpf_map_ops htab_map_ops = { BATCH_OPS(htab), .map_btf_name = "bpf_htab", .map_btf_id = &htab_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int htab_lru_map_btf_id; @@ -1649,6 +1840,7 @@ const struct bpf_map_ops htab_lru_map_ops = { BATCH_OPS(htab_lru), .map_btf_name = "bpf_htab", .map_btf_id = &htab_lru_map_btf_id, + .iter_seq_info = &iter_seq_info, }; /* Called from eBPF program */ @@ -1766,6 +1958,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { BATCH_OPS(htab_percpu), .map_btf_name = "bpf_htab", .map_btf_id = &htab_percpu_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int htab_lru_percpu_map_btf_id; @@ -1781,6 +1974,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { BATCH_OPS(htab_lru_percpu), .map_btf_name = "bpf_htab", .map_btf_id = &htab_lru_percpu_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int fd_htab_map_alloc_check(union bpf_attr *attr) diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 51bd5a8cb01b..571bb351ed3b 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -13,6 +13,8 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO #ifdef CONFIG_CGROUP_BPF +#include "../cgroup/cgroup-internal.h" + #define LOCAL_STORAGE_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) @@ -20,7 +22,6 @@ struct bpf_cgroup_storage_map { struct bpf_map map; spinlock_t lock; - struct bpf_prog_aux *aux; struct rb_root root; struct list_head list; }; @@ -30,24 +31,41 @@ static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) return container_of(map, struct bpf_cgroup_storage_map, map); } -static int bpf_cgroup_storage_key_cmp( - const struct bpf_cgroup_storage_key *key1, - const struct bpf_cgroup_storage_key *key2) +static bool attach_type_isolated(const struct bpf_map *map) { - if (key1->cgroup_inode_id < key2->cgroup_inode_id) - return -1; - else if (key1->cgroup_inode_id > key2->cgroup_inode_id) - return 1; - else if (key1->attach_type < key2->attach_type) - return -1; - else if (key1->attach_type > key2->attach_type) - return 1; + return map->key_size == sizeof(struct bpf_cgroup_storage_key); +} + +static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map, + const void *_key1, const void *_key2) +{ + if (attach_type_isolated(&map->map)) { + const struct bpf_cgroup_storage_key *key1 = _key1; + const struct bpf_cgroup_storage_key *key2 = _key2; + + if (key1->cgroup_inode_id < key2->cgroup_inode_id) + return -1; + else if (key1->cgroup_inode_id > key2->cgroup_inode_id) + return 1; + else if (key1->attach_type < key2->attach_type) + return -1; + else if (key1->attach_type > key2->attach_type) + return 1; + } else { + const __u64 *cgroup_inode_id1 = _key1; + const __u64 *cgroup_inode_id2 = _key2; + + if (*cgroup_inode_id1 < *cgroup_inode_id2) + return -1; + else if (*cgroup_inode_id1 > *cgroup_inode_id2) + return 1; + } return 0; } -static struct bpf_cgroup_storage *cgroup_storage_lookup( - struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, - bool locked) +struct bpf_cgroup_storage * +cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, + void *key, bool locked) { struct rb_root *root = &map->root; struct rb_node *node; @@ -61,7 +79,7 @@ static struct bpf_cgroup_storage *cgroup_storage_lookup( storage = container_of(node, struct bpf_cgroup_storage, node); - switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { + switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) { case -1: node = node->rb_left; break; @@ -93,7 +111,7 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, this = container_of(*new, struct bpf_cgroup_storage, node); parent = *new; - switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { + switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) { case -1: new = &((*new)->rb_left); break; @@ -111,10 +129,9 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, return 0; } -static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) +static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); - struct bpf_cgroup_storage_key *key = _key; struct bpf_cgroup_storage *storage; storage = cgroup_storage_lookup(map, key, false); @@ -124,17 +141,13 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) return &READ_ONCE(storage->buf)->data[0]; } -static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, +static int cgroup_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { - struct bpf_cgroup_storage_key *key = _key; struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST))) - return -EINVAL; - - if (unlikely(flags & BPF_NOEXIST)) + if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST))) return -EINVAL; if (unlikely((flags & BPF_F_LOCK) && @@ -167,11 +180,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, return 0; } -int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, +int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key, void *value) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); - struct bpf_cgroup_storage_key *key = _key; struct bpf_cgroup_storage *storage; int cpu, off = 0; u32 size; @@ -197,11 +209,10 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, return 0; } -int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, +int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key, void *value, u64 map_flags) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); - struct bpf_cgroup_storage_key *key = _key; struct bpf_cgroup_storage *storage; int cpu, off = 0; u32 size; @@ -232,12 +243,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, return 0; } -static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, +static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key, void *_next_key) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); - struct bpf_cgroup_storage_key *key = _key; - struct bpf_cgroup_storage_key *next = _next_key; struct bpf_cgroup_storage *storage; spin_lock_bh(&map->lock); @@ -250,17 +259,23 @@ static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, if (!storage) goto enoent; - storage = list_next_entry(storage, list); + storage = list_next_entry(storage, list_map); if (!storage) goto enoent; } else { storage = list_first_entry(&map->list, - struct bpf_cgroup_storage, list); + struct bpf_cgroup_storage, list_map); } spin_unlock_bh(&map->lock); - next->attach_type = storage->key.attach_type; - next->cgroup_inode_id = storage->key.cgroup_inode_id; + + if (attach_type_isolated(&map->map)) { + struct bpf_cgroup_storage_key *next = _next_key; + *next = storage->key; + } else { + __u64 *next = _next_key; + *next = storage->key.cgroup_inode_id; + } return 0; enoent: @@ -275,7 +290,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) struct bpf_map_memory mem; int ret; - if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && + attr->key_size != sizeof(__u64)) return ERR_PTR(-EINVAL); if (attr->value_size == 0) @@ -318,6 +334,17 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) static void cgroup_storage_map_free(struct bpf_map *_map) { struct bpf_cgroup_storage_map *map = map_to_storage(_map); + struct list_head *storages = &map->list; + struct bpf_cgroup_storage *storage, *stmp; + + mutex_lock(&cgroup_mutex); + + list_for_each_entry_safe(storage, stmp, storages, list_map) { + bpf_cgroup_storage_unlink(storage); + bpf_cgroup_storage_free(storage); + } + + mutex_unlock(&cgroup_mutex); WARN_ON(!RB_EMPTY_ROOT(&map->root)); WARN_ON(!list_empty(&map->list)); @@ -335,49 +362,63 @@ static int cgroup_storage_check_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type) { - struct btf_member *m; - u32 offset, size; - - /* Key is expected to be of struct bpf_cgroup_storage_key type, - * which is: - * struct bpf_cgroup_storage_key { - * __u64 cgroup_inode_id; - * __u32 attach_type; - * }; - */ + if (attach_type_isolated(map)) { + struct btf_member *m; + u32 offset, size; + + /* Key is expected to be of struct bpf_cgroup_storage_key type, + * which is: + * struct bpf_cgroup_storage_key { + * __u64 cgroup_inode_id; + * __u32 attach_type; + * }; + */ + + /* + * Key_type must be a structure with two fields. + */ + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(key_type->info) != 2) + return -EINVAL; + + /* + * The first field must be a 64 bit integer at 0 offset. + */ + m = (struct btf_member *)(key_type + 1); + size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id); + if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) + return -EINVAL; + + /* + * The second field must be a 32 bit integer at 64 bit offset. + */ + m++; + offset = offsetof(struct bpf_cgroup_storage_key, attach_type); + size = sizeof_field(struct bpf_cgroup_storage_key, attach_type); + if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) + return -EINVAL; + } else { + u32 int_data; - /* - * Key_type must be a structure with two fields. - */ - if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT || - BTF_INFO_VLEN(key_type->info) != 2) - return -EINVAL; + /* + * Key is expected to be u64, which stores the cgroup_inode_id + */ - /* - * The first field must be a 64 bit integer at 0 offset. - */ - m = (struct btf_member *)(key_type + 1); - size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id); - if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) - return -EINVAL; + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; - /* - * The second field must be a 32 bit integer at 64 bit offset. - */ - m++; - offset = offsetof(struct bpf_cgroup_storage_key, attach_type); - size = sizeof_field(struct bpf_cgroup_storage_key, attach_type); - if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) - return -EINVAL; + int_data = *(u32 *)(key_type + 1); + if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data)) + return -EINVAL; + } return 0; } -static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key, +static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); - struct bpf_cgroup_storage_key *key = _key; struct bpf_cgroup_storage *storage; int cpu; @@ -426,38 +467,13 @@ const struct bpf_map_ops cgroup_storage_map_ops = { int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map) { enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); - struct bpf_cgroup_storage_map *map = map_to_storage(_map); - int ret = -EBUSY; - - spin_lock_bh(&map->lock); - if (map->aux && map->aux != aux) - goto unlock; if (aux->cgroup_storage[stype] && aux->cgroup_storage[stype] != _map) - goto unlock; + return -EBUSY; - map->aux = aux; aux->cgroup_storage[stype] = _map; - ret = 0; -unlock: - spin_unlock_bh(&map->lock); - - return ret; -} - -void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *_map) -{ - enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); - struct bpf_cgroup_storage_map *map = map_to_storage(_map); - - spin_lock_bh(&map->lock); - if (map->aux == aux) { - WARN_ON(aux->cgroup_storage[stype] != _map); - map->aux = NULL; - aux->cgroup_storage[stype] = NULL; - } - spin_unlock_bh(&map->lock); + return 0; } static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) @@ -578,7 +594,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, spin_lock_bh(&map->lock); WARN_ON(cgroup_storage_insert(map, storage)); - list_add(&storage->list, &map->list); + list_add(&storage->list_map, &map->list); + list_add(&storage->list_cg, &cgroup->bpf.storages); spin_unlock_bh(&map->lock); } @@ -596,7 +613,8 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) root = &map->root; rb_erase(&storage->node, root); - list_del(&storage->list); + list_del(&storage->list_map); + list_del(&storage->list_cg); spin_unlock_bh(&map->lock); } diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 8a7af11b411f..fbe1f557cb88 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -7,7 +7,7 @@ #include <linux/btf_ids.h> struct bpf_iter_seq_map_info { - u32 mid; + u32 map_id; }; static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos) @@ -15,27 +15,23 @@ static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos) struct bpf_iter_seq_map_info *info = seq->private; struct bpf_map *map; - map = bpf_map_get_curr_or_next(&info->mid); + map = bpf_map_get_curr_or_next(&info->map_id); if (!map) return NULL; - ++*pos; + if (*pos == 0) + ++*pos; return map; } static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_map_info *info = seq->private; - struct bpf_map *map; ++*pos; - ++info->mid; + ++info->map_id; bpf_map_put((struct bpf_map *)v); - map = bpf_map_get_curr_or_next(&info->mid); - if (!map) - return NULL; - - return map; + return bpf_map_get_curr_or_next(&info->map_id); } struct bpf_iter__bpf_map { @@ -85,23 +81,79 @@ static const struct seq_operations bpf_map_seq_ops = { BTF_ID_LIST(btf_bpf_map_id) BTF_ID(struct, bpf_map) -static struct bpf_iter_reg bpf_map_reg_info = { - .target = "bpf_map", +static const struct bpf_iter_seq_info bpf_map_seq_info = { .seq_ops = &bpf_map_seq_ops, .init_seq_private = NULL, .fini_seq_private = NULL, .seq_priv_size = sizeof(struct bpf_iter_seq_map_info), +}; + +static struct bpf_iter_reg bpf_map_reg_info = { + .target = "bpf_map", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map, map), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &bpf_map_seq_info, +}; + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + u32 key_acc_size, value_acc_size, key_size, value_size; + struct bpf_map *map = aux->map; + bool is_percpu = false; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + is_percpu = true; + else if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -EINVAL; + + key_acc_size = prog->aux->max_rdonly_access; + value_acc_size = prog->aux->max_rdwr_access; + key_size = map->key_size; + if (!is_percpu) + value_size = map->value_size; + else + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + + if (key_acc_size > key_size || value_acc_size > value_size) + return -EACCES; + + return 0; +} + +DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, + struct bpf_map *map, void *key, void *value) + +static const struct bpf_iter_reg bpf_map_elem_reg_info = { + .target = "bpf_map_elem", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_map_elem, key), + PTR_TO_RDONLY_BUF_OR_NULL }, + { offsetof(struct bpf_iter__bpf_map_elem, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, }; static int __init bpf_map_iter_init(void) { + int ret; + bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id; - return bpf_iter_reg_target(&bpf_map_reg_info); + ret = bpf_iter_reg_target(&bpf_map_reg_info); + if (ret) + return ret; + + return bpf_iter_reg_target(&bpf_map_elem_reg_info); } late_initcall(bpf_map_iter_init); diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 71405edd667c..542f275bf252 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -142,9 +142,16 @@ static void bpf_netns_link_release(struct bpf_link *link) bpf_prog_array_free(old_array); out_unlock: + net_link->net = NULL; mutex_unlock(&netns_bpf_mutex); } +static int bpf_netns_link_detach(struct bpf_link *link) +{ + bpf_netns_link_release(link); + return 0; +} + static void bpf_netns_link_dealloc(struct bpf_link *link) { struct bpf_netns_link *net_link = @@ -228,6 +235,7 @@ static void bpf_netns_link_show_fdinfo(const struct bpf_link *link, static const struct bpf_link_ops bpf_netns_link_ops = { .release = bpf_netns_link_release, .dealloc = bpf_netns_link_dealloc, + .detach = bpf_netns_link_detach, .update_prog = bpf_netns_link_update_prog, .fill_link_info = bpf_netns_link_fill_info, .show_fdinfo = bpf_netns_link_show_fdinfo, diff --git a/kernel/bpf/prog_iter.c b/kernel/bpf/prog_iter.c new file mode 100644 index 000000000000..53a73c841c13 --- /dev/null +++ b/kernel/bpf/prog_iter.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> +#include <linux/fs.h> +#include <linux/filter.h> +#include <linux/kernel.h> +#include <linux/btf_ids.h> + +struct bpf_iter_seq_prog_info { + u32 prog_id; +}; + +static void *bpf_prog_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_prog_info *info = seq->private; + struct bpf_prog *prog; + + prog = bpf_prog_get_curr_or_next(&info->prog_id); + if (!prog) + return NULL; + + if (*pos == 0) + ++*pos; + return prog; +} + +static void *bpf_prog_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_prog_info *info = seq->private; + + ++*pos; + ++info->prog_id; + bpf_prog_put((struct bpf_prog *)v); + return bpf_prog_get_curr_or_next(&info->prog_id); +} + +struct bpf_iter__bpf_prog { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_prog *, prog); +}; + +DEFINE_BPF_ITER_FUNC(bpf_prog, struct bpf_iter_meta *meta, struct bpf_prog *prog) + +static int __bpf_prog_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter__bpf_prog ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + ctx.meta = &meta; + ctx.prog = v; + meta.seq = seq; + prog = bpf_iter_get_info(&meta, in_stop); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + return ret; +} + +static int bpf_prog_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_prog_seq_show(seq, v, false); +} + +static void bpf_prog_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_prog_seq_show(seq, v, true); + else + bpf_prog_put((struct bpf_prog *)v); +} + +static const struct seq_operations bpf_prog_seq_ops = { + .start = bpf_prog_seq_start, + .next = bpf_prog_seq_next, + .stop = bpf_prog_seq_stop, + .show = bpf_prog_seq_show, +}; + +BTF_ID_LIST(btf_bpf_prog_id) +BTF_ID(struct, bpf_prog) + +static const struct bpf_iter_seq_info bpf_prog_seq_info = { + .seq_ops = &bpf_prog_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_prog_info), +}; + +static struct bpf_iter_reg bpf_prog_reg_info = { + .target = "bpf_prog", + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_prog, prog), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &bpf_prog_seq_info, +}; + +static int __init bpf_prog_iter_init(void) +{ + bpf_prog_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_prog_id; + return bpf_iter_reg_target(&bpf_prog_reg_info); +} + +late_initcall(bpf_prog_iter_init); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 48d8e739975f..4fd830a62be2 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/jhash.h> #include <linux/filter.h> +#include <linux/kernel.h> #include <linux/stacktrace.h> #include <linux/perf_event.h> #include <linux/elf.h> @@ -387,11 +388,10 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) #endif } -BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, - u64, flags) +static long __bpf_get_stackid(struct bpf_map *map, + struct perf_callchain_entry *trace, u64 flags) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); - struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; u32 max_depth = map->value_size / stack_map_data_size(map); /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ @@ -399,21 +399,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; - bool kernel = !user; u64 *ips; bool hash_matches; - if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | - BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) - return -EINVAL; - - trace = get_perf_callchain(regs, init_nr, kernel, user, - sysctl_perf_event_max_stack, false, false); - - if (unlikely(!trace)) - /* couldn't fetch the stack trace */ - return -EFAULT; - /* get_perf_callchain() guarantees that trace->nr >= init_nr * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth */ @@ -478,6 +466,30 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, return id; } +BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) +{ + u32 max_depth = map->value_size / stack_map_data_size(map); + /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ + u32 init_nr = sysctl_perf_event_max_stack - max_depth; + bool user = flags & BPF_F_USER_STACK; + struct perf_callchain_entry *trace; + bool kernel = !user; + + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) + return -EINVAL; + + trace = get_perf_callchain(regs, init_nr, kernel, user, + sysctl_perf_event_max_stack, false, false); + + if (unlikely(!trace)) + /* couldn't fetch the stack trace */ + return -EFAULT; + + return __bpf_get_stackid(map, trace, flags); +} + const struct bpf_func_proto bpf_get_stackid_proto = { .func = bpf_get_stackid, .gpl_only = true, @@ -487,7 +499,77 @@ const struct bpf_func_proto bpf_get_stackid_proto = { .arg3_type = ARG_ANYTHING, }; +static __u64 count_kernel_ip(struct perf_callchain_entry *trace) +{ + __u64 nr_kernel = 0; + + while (nr_kernel < trace->nr) { + if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) + break; + nr_kernel++; + } + return nr_kernel; +} + +BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, + struct bpf_map *, map, u64, flags) +{ + struct perf_event *event = ctx->event; + struct perf_callchain_entry *trace; + bool kernel, user; + __u64 nr_kernel; + int ret; + + /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ + if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) + return bpf_get_stackid((unsigned long)(ctx->regs), + (unsigned long) map, flags, 0, 0); + + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) + return -EINVAL; + + user = flags & BPF_F_USER_STACK; + kernel = !user; + + trace = ctx->data->callchain; + if (unlikely(!trace)) + return -EFAULT; + + nr_kernel = count_kernel_ip(trace); + + if (kernel) { + __u64 nr = trace->nr; + + trace->nr = nr_kernel; + ret = __bpf_get_stackid(map, trace, flags); + + /* restore nr */ + trace->nr = nr; + } else { /* user */ + u64 skip = flags & BPF_F_SKIP_FIELD_MASK; + + skip += nr_kernel; + if (skip > BPF_F_SKIP_FIELD_MASK) + return -EFAULT; + + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; + ret = __bpf_get_stackid(map, trace, flags); + } + return ret; +} + +const struct bpf_func_proto bpf_get_stackid_proto_pe = { + .func = bpf_get_stackid_pe, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, + struct perf_callchain_entry *trace_in, void *buf, u32 size, u64 flags) { u32 init_nr, trace_nr, copy_len, elem_size, num_elem; @@ -520,7 +602,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, else init_nr = sysctl_perf_event_max_stack - num_elem; - if (kernel && task) + if (trace_in) + trace = trace_in; + else if (kernel && task) trace = get_callchain_entry_for_task(task, init_nr); else trace = get_perf_callchain(regs, init_nr, kernel, user, @@ -556,7 +640,7 @@ clear: BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, u64, flags) { - return __bpf_get_stack(regs, NULL, buf, size, flags); + return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); } const struct bpf_func_proto bpf_get_stack_proto = { @@ -574,7 +658,7 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, { struct pt_regs *regs = task_pt_regs(task); - return __bpf_get_stack(regs, task, buf, size, flags); + return __bpf_get_stack(regs, task, NULL, buf, size, flags); } BTF_ID_LIST(bpf_get_task_stack_btf_ids) @@ -591,6 +675,69 @@ const struct bpf_func_proto bpf_get_task_stack_proto = { .btf_id = bpf_get_task_stack_btf_ids, }; +BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, + void *, buf, u32, size, u64, flags) +{ + struct pt_regs *regs = (struct pt_regs *)(ctx->regs); + struct perf_event *event = ctx->event; + struct perf_callchain_entry *trace; + bool kernel, user; + int err = -EINVAL; + __u64 nr_kernel; + + if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) + return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); + + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | + BPF_F_USER_BUILD_ID))) + goto clear; + + user = flags & BPF_F_USER_STACK; + kernel = !user; + + err = -EFAULT; + trace = ctx->data->callchain; + if (unlikely(!trace)) + goto clear; + + nr_kernel = count_kernel_ip(trace); + + if (kernel) { + __u64 nr = trace->nr; + + trace->nr = nr_kernel; + err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); + + /* restore nr */ + trace->nr = nr; + } else { /* user */ + u64 skip = flags & BPF_F_SKIP_FIELD_MASK; + + skip += nr_kernel; + if (skip > BPF_F_SKIP_FIELD_MASK) + goto clear; + + flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; + err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); + } + return err; + +clear: + memset(buf, 0, size); + return err; + +} + +const struct bpf_func_proto bpf_get_stack_proto_pe = { + .func = bpf_get_stack_pe, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + /* Called from eBPF program */ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d07417d17712..2f343ce15747 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2824,6 +2824,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_TRACING; case BPF_SK_LOOKUP: return BPF_PROG_TYPE_SK_LOOKUP; + case BPF_XDP: + return BPF_PROG_TYPE_XDP; default: return BPF_PROG_TYPE_UNSPEC; } @@ -3044,6 +3046,25 @@ again: return map; } +struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) +{ + struct bpf_prog *prog; + + spin_lock_bh(&prog_idr_lock); +again: + prog = idr_get_next(&prog_idr, id); + if (prog) { + prog = bpf_prog_inc_not_zero(prog); + if (IS_ERR(prog)) { + (*id)++; + goto again; + } + } + spin_unlock_bh(&prog_idr_lock); + + return prog; +} + #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id struct bpf_prog *bpf_prog_by_id(u32 id) @@ -3902,6 +3923,11 @@ static int link_create(union bpf_attr *attr) case BPF_PROG_TYPE_SK_LOOKUP: ret = netns_bpf_link_create(attr, prog); break; +#ifdef CONFIG_NET + case BPF_PROG_TYPE_XDP: + ret = bpf_xdp_link_attach(attr, prog); + break; +#endif default: ret = -EINVAL; } @@ -3965,6 +3991,29 @@ out_put_link: return ret; } +#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd + +static int link_detach(union bpf_attr *attr) +{ + struct bpf_link *link; + int ret; + + if (CHECK_ATTR(BPF_LINK_DETACH)) + return -EINVAL; + + link = bpf_link_get_from_fd(attr->link_detach.link_fd); + if (IS_ERR(link)) + return PTR_ERR(link); + + if (link->ops->detach) + ret = link->ops->detach(link); + else + ret = -EOPNOTSUPP; + + bpf_link_put(link); + return ret; +} + static int bpf_link_inc_not_zero(struct bpf_link *link) { return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT; @@ -4214,6 +4263,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_ITER_CREATE: err = bpf_iter_create(&attr); break; + case BPF_LINK_DETACH: + err = link_detach(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 2feecf095609..232df29793e9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -51,7 +51,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos) if (!task) return NULL; - ++*pos; + if (*pos == 0) + ++*pos; return task; } @@ -210,7 +211,8 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) return NULL; } - ++*pos; + if (*pos == 0) + ++*pos; info->task = task; info->files = files; @@ -291,7 +293,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) } } -static int init_seq_pidns(void *priv_data) +static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_task_common *common = priv_data; @@ -317,25 +319,32 @@ BTF_ID_LIST(btf_task_file_ids) BTF_ID(struct, task_struct) BTF_ID(struct, file) -static struct bpf_iter_reg task_reg_info = { - .target = "task", +static const struct bpf_iter_seq_info task_seq_info = { .seq_ops = &task_seq_ops, .init_seq_private = init_seq_pidns, .fini_seq_private = fini_seq_pidns, .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), +}; + +static struct bpf_iter_reg task_reg_info = { + .target = "task", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__task, task), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &task_seq_info, }; -static struct bpf_iter_reg task_file_reg_info = { - .target = "task_file", +static const struct bpf_iter_seq_info task_file_seq_info = { .seq_ops = &task_file_seq_ops, .init_seq_private = init_seq_pidns, .fini_seq_private = fini_seq_pidns, .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info), +}; + +static struct bpf_iter_reg task_file_reg_info = { + .target = "task_file", .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__task_file, task), @@ -343,6 +352,7 @@ static struct bpf_iter_reg task_file_reg_info = { { offsetof(struct bpf_iter__task_file, file), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &task_file_seq_info, }; static int __init task_iter_init(void) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a6703bc3f36..b6ccfce3bf4c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -409,7 +409,9 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON_OR_NULL || type == PTR_TO_TCP_SOCK_OR_NULL || type == PTR_TO_BTF_ID_OR_NULL || - type == PTR_TO_MEM_OR_NULL; + type == PTR_TO_MEM_OR_NULL || + type == PTR_TO_RDONLY_BUF_OR_NULL || + type == PTR_TO_RDWR_BUF_OR_NULL; } static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) @@ -503,6 +505,10 @@ static const char * const reg_type_str[] = { [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", [PTR_TO_MEM] = "mem", [PTR_TO_MEM_OR_NULL] = "mem_or_null", + [PTR_TO_RDONLY_BUF] = "rdonly_buf", + [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", + [PTR_TO_RDWR_BUF] = "rdwr_buf", + [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", }; static char slot_type_char[] = { @@ -2173,6 +2179,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: case PTR_TO_BTF_ID_OR_NULL: + case PTR_TO_RDONLY_BUF: + case PTR_TO_RDONLY_BUF_OR_NULL: + case PTR_TO_RDWR_BUF: + case PTR_TO_RDWR_BUF_OR_NULL: return true; default: return false; @@ -3052,14 +3062,15 @@ int check_ctx_reg(struct bpf_verifier_env *env, return 0; } -static int check_tp_buffer_access(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, - int regno, int off, int size) +static int __check_buffer_access(struct bpf_verifier_env *env, + const char *buf_info, + const struct bpf_reg_state *reg, + int regno, int off, int size) { if (off < 0) { verbose(env, - "R%d invalid tracepoint buffer access: off=%d, size=%d", - regno, off, size); + "R%d invalid %s buffer access: off=%d, size=%d\n", + regno, buf_info, off, size); return -EACCES; } if (!tnum_is_const(reg->var_off) || reg->var_off.value) { @@ -3067,16 +3078,49 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env, tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, - "R%d invalid variable buffer offset: off=%d, var_off=%s", + "R%d invalid variable buffer offset: off=%d, var_off=%s\n", regno, off, tn_buf); return -EACCES; } + + return 0; +} + +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + int err; + + err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); + if (err) + return err; + if (off + size > env->prog->aux->max_tp_access) env->prog->aux->max_tp_access = off + size; return 0; } +static int check_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size, + bool zero_size_allowed, + const char *buf_info, + u32 *max_access) +{ + int err; + + err = __check_buffer_access(env, buf_info, reg, regno, off, size); + if (err) + return err; + + if (off + size > *max_access) + *max_access = off + size; + + return 0; +} + /* BPF architecture zero extends alu32 ops into 64-bit registesr */ static void zext_32_to_64(struct bpf_reg_state *reg) { @@ -3427,6 +3471,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); + } else if (reg->type == PTR_TO_RDONLY_BUF) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); + return -EACCES; + } + err = check_buffer_access(env, reg, regno, off, size, false, + "rdonly", + &env->prog->aux->max_rdonly_access); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_RDWR_BUF) { + err = check_buffer_access(env, reg, regno, off, size, false, + "rdwr", + &env->prog->aux->max_rdwr_access); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -3668,6 +3729,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); + case PTR_TO_RDONLY_BUF: + if (meta && meta->raw_mode) + return -EACCES; + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, + "rdonly", + &env->prog->aux->max_rdonly_access); + case PTR_TO_RDWR_BUF: + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, + "rdwr", + &env->prog->aux->max_rdwr_access); default: /* scalar_value|ptr_to_stack or invalid ptr */ return check_stack_boundary(env, regno, access_size, zero_size_allowed, meta); @@ -3933,6 +4006,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, else if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && type != PTR_TO_MEM && + type != PTR_TO_RDONLY_BUF && + type != PTR_TO_RDWR_BUF && type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; @@ -4887,6 +4962,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn env->prog->has_callchain_buf = true; } + if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) + env->prog->call_get_stack = true; + if (changes_data) clear_all_pkt_pointers(env); return 0; @@ -6806,6 +6884,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_BTF_ID; } else if (reg->type == PTR_TO_MEM_OR_NULL) { reg->type = PTR_TO_MEM; + } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) { + reg->type = PTR_TO_RDONLY_BUF; + } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) { + reg->type = PTR_TO_RDWR_BUF; } if (is_null) { /* We don't need id and ref_obj_id from this point diff --git a/kernel/events/core.c b/kernel/events/core.c index 856d98c36f56..ddcfd2fb5cc5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9544,6 +9544,24 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) if (IS_ERR(prog)) return PTR_ERR(prog); + if (event->attr.precise_ip && + prog->call_get_stack && + (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) || + event->attr.exclude_callchain_kernel || + event->attr.exclude_callchain_user)) { + /* + * On perf_event with precise_ip, calling bpf_get_stack() + * may trigger unwinder warnings and occasional crashes. + * bpf_get_[stack|stackid] works around this issue by using + * callchain attached to perf_sample_data. If the + * perf_event does not full (kernel and user) callchain + * attached to perf_sample_data, do not allow attaching BPF + * program that calls bpf_get_[stack|stackid]. + */ + bpf_prog_put(prog); + return -EPROTO; + } + event->prog = prog; event->orig_overflow_handler = READ_ONCE(event->overflow_handler); WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3cc0dcb60ca2..cb91ef902cc4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1411,9 +1411,9 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: - return &bpf_get_stackid_proto_tp; + return &bpf_get_stackid_proto_pe; case BPF_FUNC_get_stack: - return &bpf_get_stack_proto_tp; + return &bpf_get_stack_proto_pe; case BPF_FUNC_perf_prog_read_value: return &bpf_perf_prog_read_value_proto; case BPF_FUNC_read_branch_records: diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b03c469cd01f..99eb8c6c0fbc 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -327,6 +327,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* priority is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), + offsetof(struct __sk_buff, ifindex))) + return -EINVAL; + + /* ifindex is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), offsetof(struct __sk_buff, cb))) return -EINVAL; @@ -381,6 +387,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) __skb->mark = skb->mark; __skb->priority = skb->priority; + __skb->ifindex = skb->dev->ifindex; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); __skb->wire_len = cb->pkt_len; @@ -391,6 +398,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; + struct net *net = current->nsproxy->net_ns; + struct net_device *dev = net->loopback_dev; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; @@ -432,7 +441,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, kfree(ctx); return -ENOMEM; } - sock_net_set(sk, current->nsproxy->net_ns); + sock_net_set(sk, net); sock_init_data(NULL, sk); skb = build_skb(data, 0); @@ -446,9 +455,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); - skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + if (ctx && ctx->ifindex > 1) { + dev = dev_get_by_index(net, ctx->ifindex); + if (!dev) { + ret = -ENODEV; + goto out; + } + } + skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); + switch (skb->protocol) { + case htons(ETH_P_IP): + sk->sk_family = AF_INET; + if (sizeof(struct iphdr) <= skb_headlen(skb)) { + sk->sk_rcv_saddr = ip_hdr(skb)->saddr; + sk->sk_daddr = ip_hdr(skb)->daddr; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + sk->sk_family = AF_INET6; + if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) { + sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr; + sk->sk_v6_daddr = ipv6_hdr(skb)->daddr; + } + break; +#endif + default: + break; + } + if (is_l2) __skb_push(skb, hh_len); if (is_direct_pkt_access) @@ -481,6 +518,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); out: + if (dev && dev != net->loopback_dev) + dev_put(dev); kfree_skb(skb); bpf_sk_storage_free(sk); kfree(sk); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 6f921c4ddc2c..d3377c90a291 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> +#include <linux/btf_ids.h> #include <net/bpf_sk_storage.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> @@ -943,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_sk_storage_delete_proto = { .func = bpf_sk_storage_delete, .gpl_only = false, @@ -1217,3 +1228,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, return err; } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); + +struct bpf_iter_seq_sk_storage_map_info { + struct bpf_map *map; + unsigned int bucket_id; + unsigned skip_elems; +}; + +static struct bpf_sk_storage_elem * +bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, + struct bpf_sk_storage_elem *prev_selem) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + u32 skip_elems = info->skip_elems; + struct bpf_sk_storage_map *smap; + u32 bucket_id = info->bucket_id; + u32 i, count, n_buckets; + struct bucket *b; + + smap = (struct bpf_sk_storage_map *)info->map; + n_buckets = 1U << smap->bucket_log; + if (bucket_id >= n_buckets) + return NULL; + + /* try to find next selem in the same bucket */ + selem = prev_selem; + count = 0; + while (selem) { + selem = hlist_entry_safe(selem->map_node.next, + struct bpf_sk_storage_elem, map_node); + if (!selem) { + /* not found, unlock and go to the next bucket */ + b = &smap->buckets[bucket_id++]; + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + break; + } + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage) { + info->skip_elems = skip_elems + count; + return selem; + } + count++; + } + + for (i = bucket_id; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + raw_spin_lock_bh(&b->lock); + count = 0; + hlist_for_each_entry(selem, &b->list, map_node) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage && count >= skip_elems) { + info->bucket_id = i; + info->skip_elems = count; + return selem; + } + count++; + } + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_sk_storage_elem *selem; + + selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); + if (!selem) + return NULL; + + if (*pos == 0) + ++*pos; + return selem; +} + +static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_sk_storage_map_seq_find_next(seq->private, v); +} + +struct bpf_iter__bpf_sk_storage_map { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(struct sock *, sk); + __bpf_md_ptr(void *, value); +}; + +DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, + struct bpf_map *map, struct sock *sk, + void *value) + +static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_iter__bpf_sk_storage_map ctx = {}; + struct bpf_sk_storage *sk_storage; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, selem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (selem) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + ctx.sk = sk_storage->sk; + ctx.value = SDATA(selem)->data; + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_sk_storage_map_seq_show(seq, v); +} + +static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (!v) { + (void)__bpf_sk_storage_map_seq_show(seq, v); + } else { + smap = (struct bpf_sk_storage_map *)info->map; + b = &smap->buckets[info->bucket_id]; + raw_spin_unlock_bh(&b->lock); + } +} + +static int bpf_iter_init_sk_storage_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + seq_info->map = aux->map; + return 0; +} + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + struct bpf_map *map = aux->map; + + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + return -EINVAL; + + if (prog->aux->max_rdonly_access > map->value_size) + return -EACCES; + + return 0; +} + +static const struct seq_operations bpf_sk_storage_map_seq_ops = { + .start = bpf_sk_storage_map_seq_start, + .next = bpf_sk_storage_map_seq_next, + .stop = bpf_sk_storage_map_seq_stop, + .show = bpf_sk_storage_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_sk_storage_map_seq_ops, + .init_seq_private = bpf_iter_init_sk_storage_map, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), +}; + +static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { + .target = "bpf_sk_storage_map", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, + .seq_info = &iter_seq_info, +}; + +static int __init bpf_sk_storage_map_iter_init(void) +{ + bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = + btf_sock_ids[BTF_SOCK_TYPE_SOCK]; + return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); +} +late_initcall(bpf_sk_storage_map_iter_init); diff --git a/net/core/dev.c b/net/core/dev.c index f7ef0f5c5569..7df6c9617321 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5467,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) } break; - case XDP_QUERY_PROG: - xdp->prog_id = old ? old->aux->id : 0; - break; - default: ret = -EINVAL; break; @@ -8740,189 +8736,464 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, } EXPORT_SYMBOL(dev_change_proto_down_reason); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, - enum bpf_netdev_command cmd) +struct bpf_xdp_link { + struct bpf_link link; + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ + int flags; +}; + +static enum bpf_xdp_mode dev_xdp_mode(u32 flags) { - struct netdev_bpf xdp; + if (flags & XDP_FLAGS_HW_MODE) + return XDP_MODE_HW; + if (flags & XDP_FLAGS_DRV_MODE) + return XDP_MODE_DRV; + return XDP_MODE_SKB; +} - if (!bpf_op) - return 0; +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) +{ + switch (mode) { + case XDP_MODE_SKB: + return generic_xdp_install; + case XDP_MODE_DRV: + case XDP_MODE_HW: + return dev->netdev_ops->ndo_bpf; + default: + return NULL; + }; +} - memset(&xdp, 0, sizeof(xdp)); - xdp.command = cmd; +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + return dev->xdp_state[mode].link; +} + +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); + + if (link) + return link->link.prog; + return dev->xdp_state[mode].prog; +} + +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) +{ + struct bpf_prog *prog = dev_xdp_prog(dev, mode); - /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); + return prog ? prog->aux->id : 0; +} - return xdp.prog_id; +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_xdp_link *link) +{ + dev->xdp_state[mode].link = link; + dev->xdp_state[mode].prog = NULL; } -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, - struct netlink_ext_ack *extack, u32 flags, - struct bpf_prog *prog) +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_prog *prog) +{ + dev->xdp_state[mode].link = NULL; + dev->xdp_state[mode].prog = prog; +} + +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, + bpf_op_t bpf_op, struct netlink_ext_ack *extack, + u32 flags, struct bpf_prog *prog) { - bool non_hw = !(flags & XDP_FLAGS_HW_MODE); - struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; int err; - if (non_hw) { - prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, - XDP_QUERY_PROG)); - if (IS_ERR(prev_prog)) - prev_prog = NULL; - } - memset(&xdp, 0, sizeof(xdp)); - if (flags & XDP_FLAGS_HW_MODE) - xdp.command = XDP_SETUP_PROG_HW; - else - xdp.command = XDP_SETUP_PROG; + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; xdp.extack = extack; xdp.flags = flags; xdp.prog = prog; + /* Drivers assume refcnt is already incremented (i.e, prog pointer is + * "moved" into driver), so they don't increment it on their own, but + * they do decrement refcnt when program is detached or replaced. + * Given net_device also owns link/prog, we need to bump refcnt here + * to prevent drivers from underflowing it. + */ + if (prog) + bpf_prog_inc(prog); err = bpf_op(dev, &xdp); - if (!err && non_hw) - bpf_prog_change_xdp(prev_prog, prog); + if (err) { + if (prog) + bpf_prog_put(prog); + return err; + } - if (prev_prog) - bpf_prog_put(prev_prog); + if (mode != XDP_MODE_HW) + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); - return err; + return 0; } static void dev_xdp_uninstall(struct net_device *dev) { - struct netdev_bpf xdp; - bpf_op_t ndo_bpf; + struct bpf_xdp_link *link; + struct bpf_prog *prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; - /* Remove generic XDP */ - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + ASSERT_RTNL(); - /* Remove from the driver */ - ndo_bpf = dev->netdev_ops->ndo_bpf; - if (!ndo_bpf) - return; + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { + prog = dev_xdp_prog(dev, mode); + if (!prog) + continue; - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - WARN_ON(ndo_bpf(dev, &xdp)); - if (xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) + continue; - /* Remove HW offload */ - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG_HW; - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + + /* auto-detach link from net device */ + link = dev_xdp_link(dev, mode); + if (link) + link->dev = NULL; + else + bpf_prog_put(prog); + + dev_xdp_set_link(dev, mode, NULL); + } } -/** - * dev_change_xdp_fd - set or clear a bpf program for a device rx path - * @dev: device - * @extack: netlink extended ack - * @fd: new program fd or negative value to clear - * @expected_fd: old program fd that userspace expects to replace or clear - * @flags: xdp-related flags - * - * Set or clear a bpf program for a device - */ -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags) +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, + struct bpf_xdp_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog, u32 flags) { - const struct net_device_ops *ops = dev->netdev_ops; - enum bpf_netdev_command query; - u32 prog_id, expected_id = 0; - bpf_op_t bpf_op, bpf_chk; - struct bpf_prog *prog; - bool offload; + struct bpf_prog *cur_prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; int err; ASSERT_RTNL(); - offload = flags & XDP_FLAGS_HW_MODE; - query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; + /* either link or prog attachment, never both */ + if (link && (new_prog || old_prog)) + return -EINVAL; + /* link supports only XDP mode flags */ + if (link && (flags & ~XDP_FLAGS_MODES)) { + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); + return -EINVAL; + } + /* just one XDP mode bit should be set, zero defaults to SKB mode */ + if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); + return -EINVAL; + } + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); + return -EINVAL; + } - bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { - NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); - return -EOPNOTSUPP; + mode = dev_xdp_mode(flags); + /* can't replace attached link */ + if (dev_xdp_link(dev, mode)) { + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); + return -EBUSY; } - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) - bpf_op = generic_xdp_install; - if (bpf_op == bpf_chk) - bpf_chk = generic_xdp_install; - - prog_id = __dev_xdp_query(dev, bpf_op, query); - if (flags & XDP_FLAGS_REPLACE) { - if (expected_fd >= 0) { - prog = bpf_prog_get_type_dev(expected_fd, - BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - expected_id = prog->aux->id; - bpf_prog_put(prog); - } - if (prog_id != expected_id) { - NL_SET_ERR_MSG(extack, "Active program does not match expected"); - return -EEXIST; - } + cur_prog = dev_xdp_prog(dev, mode); + /* can't replace attached prog with link */ + if (link && cur_prog) { + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); + return -EBUSY; + } + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { + NL_SET_ERR_MSG(extack, "Active program does not match expected"); + return -EEXIST; + } + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; } - if (fd >= 0) { - if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { - NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); - return -EEXIST; - } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } + /* put effective new program into new_prog */ + if (link) + new_prog = link->link.prog; - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (new_prog) { + bool offload = mode == XDP_MODE_HW; + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB + ? XDP_MODE_DRV : XDP_MODE_SKB; - if (!offload && bpf_prog_is_dev_bound(prog->aux)) { - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); - bpf_prog_put(prog); + if (!offload && dev_xdp_prog(dev, other_mode)) { + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); + return -EEXIST; + } + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); return -EINVAL; } - - if (prog->expected_attach_type == BPF_XDP_DEVMAP) { + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); - bpf_prog_put(prog); return -EINVAL; } - - if (prog->expected_attach_type == BPF_XDP_CPUMAP) { - NL_SET_ERR_MSG(extack, - "BPF_XDP_CPUMAP programs can not be attached to a device"); - bpf_prog_put(prog); + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); return -EINVAL; } + } - /* prog->aux->id may be 0 for orphaned device-bound progs */ - if (prog->aux->id && prog->aux->id == prog_id) { - bpf_prog_put(prog); - return 0; + /* don't call drivers if the effective program didn't change */ + if (new_prog != cur_prog) { + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) { + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); + return -EOPNOTSUPP; + } + + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); + if (err) + return err; + } + + if (link) + dev_xdp_set_link(dev, mode, link); + else + dev_xdp_set_prog(dev, mode, new_prog); + if (cur_prog) + bpf_prog_put(cur_prog); + + return 0; +} + +static int dev_xdp_attach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); +} + +static int dev_xdp_detach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + + ASSERT_RTNL(); + + mode = dev_xdp_mode(link->flags); + if (dev_xdp_link(dev, mode) != link) + return -EINVAL; + + bpf_op = dev_xdp_bpf_op(dev, mode); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + dev_xdp_set_link(dev, mode, NULL); + return 0; +} + +static void bpf_xdp_link_release(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + rtnl_lock(); + + /* if racing with net_device's tear down, xdp_link->dev might be + * already NULL, in which case link was already auto-detached + */ + if (xdp_link->dev) { + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + xdp_link->dev = NULL; + } + + rtnl_unlock(); +} + +static int bpf_xdp_link_detach(struct bpf_link *link) +{ + bpf_xdp_link_release(link); + return 0; +} + +static void bpf_xdp_link_dealloc(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + kfree(xdp_link); +} + +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + seq_printf(seq, "ifindex:\t%u\n", ifindex); +} + +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + info->xdp.ifindex = ifindex; + return 0; +} + +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + int err = 0; + + rtnl_lock(); + + /* link might have been auto-released already, so fail */ + if (!xdp_link->dev) { + err = -ENOLINK; + goto out_unlock; + } + + if (old_prog && link->prog != old_prog) { + err = -EPERM; + goto out_unlock; + } + old_prog = link->prog; + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog); + goto out_unlock; + } + + mode = dev_xdp_mode(xdp_link->flags); + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, + xdp_link->flags, new_prog); + if (err) + goto out_unlock; + + old_prog = xchg(&link->prog, new_prog); + bpf_prog_put(old_prog); + +out_unlock: + rtnl_unlock(); + return err; +} + +static const struct bpf_link_ops bpf_xdp_link_lops = { + .release = bpf_xdp_link_release, + .dealloc = bpf_xdp_link_dealloc, + .detach = bpf_xdp_link_detach, + .show_fdinfo = bpf_xdp_link_show_fdinfo, + .fill_link_info = bpf_xdp_link_fill_link_info, + .update_prog = bpf_xdp_link_update, +}; + +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_xdp_link *link; + struct net_device *dev; + int err, fd; + + dev = dev_get_by_index(net, attr->link_create.target_ifindex); + if (!dev) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out_put_dev; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); + link->dev = dev; + link->flags = attr->link_create.flags; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + goto out_put_dev; + } + + rtnl_lock(); + err = dev_xdp_attach_link(dev, NULL, link); + rtnl_unlock(); + + if (err) { + bpf_link_cleanup(&link_primer); + goto out_put_dev; + } + + fd = bpf_link_settle(&link_primer); + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ + dev_put(dev); + return fd; + +out_put_dev: + dev_put(dev); + return err; +} + +/** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @extack: netlink extended ack + * @fd: new program fd or negative value to clear + * @expected_fd: old program fd that userspace expects to replace or clear + * @flags: xdp-related flags + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags) +{ + enum bpf_xdp_mode mode = dev_xdp_mode(flags); + struct bpf_prog *new_prog = NULL, *old_prog = NULL; + int err; + + ASSERT_RTNL(); + + if (fd >= 0) { + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + } + + if (expected_fd >= 0) { + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(old_prog)) { + err = PTR_ERR(old_prog); + old_prog = NULL; + goto err_out; } - } else { - if (!prog_id) - return 0; - prog = NULL; } - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); - if (err < 0 && prog) - bpf_prog_put(prog); + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); +err_out: + if (err && new_prog) + bpf_prog_put(new_prog); + if (old_prog) + bpf_prog_put(old_prog); return err; } diff --git a/net/core/filter.c b/net/core/filter.c index 29e3455122f7..7124f0fe6974 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6187,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func) } const struct bpf_func_proto bpf_event_output_data_proto __weak; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -6219,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_cg_sock_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a54c3e0f2ee1..68e0682450c6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1426,13 +1426,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev) static u32 rtnl_xdp_prog_drv(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG); + return dev_xdp_prog_id(dev, XDP_MODE_DRV); } static u32 rtnl_xdp_prog_hw(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, - XDP_QUERY_PROG_HW); + return dev_xdp_prog_id(dev, XDP_MODE_HW); } static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, diff --git a/net/core/xdp.c b/net/core/xdp.c index 3c45f99e26d5..48aba933a5a8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -int xdp_attachment_query(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - bpf->prog_id = info->prog ? info->prog->aux->id : 0; - bpf->prog_flags = info->prog ? info->flags : 0; - return 0; -} -EXPORT_SYMBOL_GPL(xdp_attachment_query); - bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8913923a6c0..5084333b5ab6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2921,7 +2921,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) -static int bpf_iter_init_tcp(void *priv_data) +static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { struct tcp_iter_state *st = priv_data; struct tcp_seq_afinfo *afinfo; @@ -2933,7 +2933,7 @@ static int bpf_iter_init_tcp(void *priv_data) afinfo->family = AF_UNSPEC; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; @@ -2947,17 +2947,21 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg tcp_reg_info = { - .target = "tcp", +static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, .seq_priv_size = sizeof(struct tcp_iter_state), +}; + +static struct bpf_iter_reg tcp_reg_info = { + .target = "tcp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &tcp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fb5e4ea133f..e88efba07551 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -473,7 +473,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } @@ -3181,7 +3181,7 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data) +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) { struct udp_iter_state *st = priv_data; struct udp_seq_afinfo *afinfo; @@ -3194,7 +3194,7 @@ static int bpf_iter_init_udp(void *priv_data) afinfo->family = AF_UNSPEC; afinfo->udp_table = &udp_table; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; @@ -3208,17 +3208,21 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg udp_reg_info = { - .target = "udp", +static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, .seq_priv_size = sizeof(struct udp_iter_state), +}; + +static struct bpf_iter_reg udp_reg_info = { + .target = "udp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &udp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48d499d763fa..5e7e25e2523a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6427,17 +6427,21 @@ DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *r BTF_ID_LIST(btf_fib6_info_id) BTF_ID(struct, fib6_info) -static struct bpf_iter_reg ipv6_route_reg_info = { - .target = "ipv6_route", +static const struct bpf_iter_seq_info ipv6_route_seq_info = { .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct ipv6_route_iter), +}; + +static struct bpf_iter_reg ipv6_route_reg_info = { + .target = "ipv6_route", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__ipv6_route, rt), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &ipv6_route_seq_info, }; static int __init bpf_iter_register(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5530c9dcb61c..29d9691359b9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -155,9 +155,6 @@ static struct sock *lookup_reuseport(struct net *net, struct sock *sk, hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - /* Fall back to scoring if group has connections */ - if (reuseport_has_conns(sk, false)) - return NULL; } return reuse_sk; } @@ -211,7 +208,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d8921b833744..b5f30d7d30d0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2807,17 +2807,21 @@ static const struct rhashtable_params netlink_rhashtable_params = { BTF_ID_LIST(btf_netlink_sock_id) BTF_ID(struct, netlink_sock) -static struct bpf_iter_reg netlink_reg_info = { - .target = "netlink", +static const struct bpf_iter_seq_info netlink_seq_info = { .seq_ops = &netlink_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct nl_seq_iter), +}; + +static struct bpf_iter_reg netlink_reg_info = { + .target = "netlink", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__netlink, sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &netlink_seq_info, }; static int __init bpf_iter_register(void) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2e94a7e94671..c3231620d210 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -840,7 +840,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case XDP_STATISTICS: { - struct xdp_statistics stats; + struct xdp_statistics stats = {}; bool extra_stats = true; size_t stats_size; diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 8dce698eab79..070ffacb42b5 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -17,14 +17,15 @@ SYNOPSIS ITER COMMANDS =================== -| **bpftool** **iter pin** *OBJ* *PATH* +| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*] | **bpftool** **iter help** | | *OBJ* := /a/file/of/bpf_iter_target.o +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } DESCRIPTION =========== - **bpftool iter pin** *OBJ* *PATH* + **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*] A bpf iterator combines a kernel iterating of particular kernel data (e.g., tasks, bpf_maps, etc.) and a bpf program called for each kernel data object @@ -37,6 +38,12 @@ DESCRIPTION character ('.'), which is reserved for future extensions of *bpffs*. + Map element bpf iterator requires an additional parameter + *MAP* so bpf program can iterate over map elements for + that map. User can have a bpf program in kernel to run + with each map element, do checking, filtering, aggregation, + etc. without copying data to user space. + User can then *cat PATH* to see the bpf iterator output. **bpftool iter help** @@ -64,6 +71,13 @@ EXAMPLES Create a file-based bpf iterator from bpf_iter_netlink.o and pin it to /sys/fs/bpf/my_netlink +**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20** + +:: + + Create a file-based bpf iterator from bpf_iter_hashmap.o and map with + id 20, and pin it to /sys/fs/bpf/my_hashmap + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 38b0949a185b..4a52e7a93339 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -21,6 +21,7 @@ LINK COMMANDS | **bpftool** **link { show | list }** [*LINK*] | **bpftool** **link pin** *LINK* *FILE* +| **bpftool** **link detach *LINK* | **bpftool** **link help** | | *LINK* := { **id** *LINK_ID* | **pinned** *FILE* } @@ -49,6 +50,13 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. + **bpftool link detach** *LINK* + Force-detach link *LINK*. BPF link and its underlying BPF + program will stay valid, but they will be detached from the + respective BPF hook and BPF link will transition into + a defunct state until last open file descriptor for that + link is closed. + **bpftool link help** Print short help message. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 51bd520ed437..8462690a039b 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -59,6 +59,7 @@ endif INSTALL ?= install RM ?= rm -f CLANG ?= clang +LLVM_STRIP ?= llvm-strip FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ @@ -147,7 +148,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_PATH) \ -I$(srctree)/tools/lib \ - -g -O2 -target bpf -c $< -o $@ + -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7b137264ea3a..f53ed2f1a4aa 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -615,7 +615,23 @@ _bpftool() iter) case $command in pin) - _filedir + case $prev in + $command) + _filedir + ;; + id) + _bpftool_get_map_ids + ;; + name) + _bpftool_get_map_names + ;; + pinned) + _filedir + ;; + *) + _bpftool_one_of_list $MAP_TYPE + ;; + esac return 0 ;; *) @@ -1106,7 +1122,7 @@ _bpftool() ;; link) case $command in - show|list|pin) + show|list|pin|detach) case $prev in id) _bpftool_get_link_ids @@ -1123,7 +1139,7 @@ _bpftool() COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) ) return 0 ;; - pin) + pin|detach) if [[ $prev == "$command" ]]; then COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) ) else diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index fc9bc7a23db6..8ab142ff5eac 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -422,54 +422,6 @@ done: return err; } -static struct btf *btf__parse_raw(const char *file) -{ - struct btf *btf; - struct stat st; - __u8 *buf; - FILE *f; - - if (stat(file, &st)) - return NULL; - - f = fopen(file, "rb"); - if (!f) - return NULL; - - buf = malloc(st.st_size); - if (!buf) { - btf = ERR_PTR(-ENOMEM); - goto exit_close; - } - - if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) { - btf = ERR_PTR(-EINVAL); - goto exit_free; - } - - btf = btf__new(buf, st.st_size); - -exit_free: - free(buf); -exit_close: - fclose(f); - return btf; -} - -static bool is_btf_raw(const char *file) -{ - __u16 magic = 0; - int fd, nb_read; - - fd = open(file, O_RDONLY); - if (fd < 0) - return false; - - nb_read = read(fd, &magic, sizeof(magic)); - close(fd); - return nb_read == sizeof(magic) && magic == BTF_MAGIC; -} - static int do_dump(int argc, char **argv) { struct btf *btf = NULL; @@ -547,11 +499,7 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - if (is_btf_raw(*argv)) - btf = btf__parse_raw(*argv); - else - btf = btf__parse_elf(*argv, NULL); - + btf = btf__parse(*argv, NULL); if (IS_ERR(btf)) { err = -PTR_ERR(btf); btf = NULL; @@ -596,7 +544,7 @@ static int do_dump(int argc, char **argv) goto done; } if (!btf) { - err = ENOENT; + err = -ENOENT; p_err("can't find btf with ID (%u)", btf_id); goto done; } diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 1cd75807673e..a43a6f10b564 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -504,6 +504,10 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, supported_types[prog_type] |= res; + if (!prog_type_name[prog_type]) { + p_info("program type name not found (type %d)", prog_type); + return; + } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; if (strlen(prog_type_name[prog_type]) > maxlen) { p_info("program type name too long"); @@ -533,6 +537,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, * check required for unprivileged users */ + if (!map_type_name[map_type]) { + p_info("map type name not found (type %d)", map_type); + return; + } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; if (strlen(map_type_name[map_type]) > maxlen) { p_info("map type name too long"); diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 33240fcc6319..c9dba7543dba 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -2,6 +2,7 @@ // Copyright (C) 2020 Facebook #define _GNU_SOURCE +#include <unistd.h> #include <linux/err.h> #include <bpf/libbpf.h> @@ -9,11 +10,12 @@ static int do_pin(int argc, char **argv) { + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; struct bpf_link *link; - int err; + int err = -1, map_fd = -1; if (!REQ_ARGS(2)) usage(); @@ -21,10 +23,26 @@ static int do_pin(int argc, char **argv) objfile = GET_ARG(); path = GET_ARG(); + /* optional arguments */ + if (argc) { + if (is_prefix(*argv, "map")) { + NEXT_ARG(); + + if (!REQ_ARGS(2)) { + p_err("incorrect map spec"); + return -1; + } + + map_fd = map_parse_fd(&argc, &argv); + if (map_fd < 0) + return -1; + } + } + obj = bpf_object__open(objfile); if (IS_ERR(obj)) { p_err("can't open objfile %s", objfile); - return -1; + goto close_map_fd; } err = bpf_object__load(obj); @@ -39,7 +57,10 @@ static int do_pin(int argc, char **argv) goto close_obj; } - link = bpf_program__attach_iter(prog, NULL); + if (map_fd >= 0) + iter_opts.map_fd = map_fd; + + link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); p_err("attach_iter failed for program %s", @@ -62,14 +83,18 @@ close_link: bpf_link__destroy(link); close_obj: bpf_object__close(obj); +close_map_fd: + if (map_fd >= 0) + close(map_fd); return err; } static int do_help(int argc, char **argv) { fprintf(stderr, - "Usage: %1$s %2$s pin OBJ PATH\n" + "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n" " %1$s %2$s help\n" + " " HELP_SPEC_MAP "\n" "", bin_name, "iter"); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 326b8fdf0243..1b793759170e 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -22,6 +22,8 @@ static const char * const link_type_name[] = { static int link_parse_fd(int *argc, char ***argv) { + int fd; + if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -35,7 +37,10 @@ static int link_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - return bpf_link_get_fd_by_id(id); + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) + p_err("failed to get link with ID %d: %s", id, strerror(errno)); + return fd; } else if (is_prefix(**argv, "pinned")) { char *path; @@ -316,6 +321,34 @@ static int do_pin(int argc, char **argv) return err; } +static int do_detach(int argc, char **argv) +{ + int err, fd; + + if (argc != 2) { + p_err("link specifier is invalid or missing\n"); + return 1; + } + + fd = link_parse_fd(&argc, &argv); + if (fd < 0) + return 1; + + err = bpf_link_detach(fd); + if (err) + err = -errno; + close(fd); + if (err) { + p_err("failed link detach: %s", strerror(-err)); + return 1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -326,6 +359,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [LINK]\n" " %1$s %2$s pin LINK FILE\n" + " %1$s %2$s detach LINK\n" " %1$s %2$s help\n" "\n" " " HELP_SPEC_LINK "\n" @@ -341,6 +375,7 @@ static const struct cmd cmds[] = { { "list", do_show }, { "help", do_help }, { "pin", do_pin }, + { "detach", do_detach }, { 0 } }; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 3e6ecc6332e2..158995d853b0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -59,6 +59,7 @@ const char * const prog_type_name[] = { [BPF_PROG_TYPE_TRACING] = "tracing", [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", }; diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore new file mode 100644 index 000000000000..a026df7dc280 --- /dev/null +++ b/tools/bpf/resolve_btfids/.gitignore @@ -0,0 +1,4 @@ +/FEATURE-DUMP.libbpf +/bpf_helper_defs.h +/fixdep +/resolve_btfids diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 6956b6350cad..52d883325a23 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -403,62 +403,6 @@ static int symbols_collect(struct object *obj) return 0; } -static struct btf *btf__parse_raw(const char *file) -{ - struct btf *btf; - struct stat st; - __u8 *buf; - FILE *f; - - if (stat(file, &st)) - return NULL; - - f = fopen(file, "rb"); - if (!f) - return NULL; - - buf = malloc(st.st_size); - if (!buf) { - btf = ERR_PTR(-ENOMEM); - goto exit_close; - } - - if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) { - btf = ERR_PTR(-EINVAL); - goto exit_free; - } - - btf = btf__new(buf, st.st_size); - -exit_free: - free(buf); -exit_close: - fclose(f); - return btf; -} - -static bool is_btf_raw(const char *file) -{ - __u16 magic = 0; - int fd, nb_read; - - fd = open(file, O_RDONLY); - if (fd < 0) - return false; - - nb_read = read(fd, &magic, sizeof(magic)); - close(fd); - return nb_read == sizeof(magic) && magic == BTF_MAGIC; -} - -static struct btf *btf_open(const char *path) -{ - if (is_btf_raw(path)) - return btf__parse_raw(path); - else - return btf__parse_elf(path, NULL); -} - static int symbols_resolve(struct object *obj) { int nr_typedefs = obj->nr_typedefs; @@ -469,7 +413,7 @@ static int symbols_resolve(struct object *obj) struct btf *btf; __u32 nr; - btf = btf_open(obj->btf ?: obj->path); + btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s", diff --git a/tools/build/Build.include b/tools/build/Build.include index 9ec01f4454f9..585486e40995 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -74,7 +74,8 @@ dep-cmd = $(if $(wildcard $(fixdep)), # dependencies in the cmd file if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ @set -e; \ - $(echo-cmd) $(cmd_$(1)) && $(dep-cmd)) + $(echo-cmd) $(cmd_$(1)); \ + $(dep-cmd)) # if_changed - execute command if any prerequisite is newer than # target, or command line has changed diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 54d0c886e3ba..b134e679e9db 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -117,6 +117,7 @@ enum bpf_cmd { BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, BPF_ITER_CREATE, + BPF_LINK_DETACH, }; enum bpf_map_type { @@ -230,6 +231,7 @@ enum bpf_attach_type { BPF_CGROUP_INET_SOCK_RELEASE, BPF_XDP_CPUMAP, BPF_SK_LOOKUP, + BPF_XDP, __MAX_BPF_ATTACH_TYPE }; @@ -242,10 +244,18 @@ enum bpf_link_type { BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, + BPF_LINK_TYPE_XDP = 6, MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -607,7 +617,10 @@ union bpf_attr { struct { /* struct used by BPF_LINK_CREATE command */ __u32 prog_fd; /* eBPF program to attach */ - __u32 target_fd; /* object to attach to */ + union { + __u32 target_fd; /* object to attach to */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ } link_create; @@ -622,6 +635,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { + __u32 link_fd; + } link_detach; + struct { /* struct used by BPF_ENABLE_STATS command */ __u32 type; } enable_stats; @@ -3229,7 +3246,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification @@ -4057,6 +4074,9 @@ struct bpf_link_info { __u32 netns_ino; __u32 attach_type; } netns; + struct { + __u32 ifindex; + } xdp; }; } __attribute__((aligned(8))); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a7329b671c41..eab14c97c15d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -598,10 +598,21 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; + attr.link_create.flags = OPTS_GET(opts, flags, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } +int bpf_link_detach(int link_fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.link_detach.link_fd = link_fd; + + return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); +} + int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts) { diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index dbef24ebcfcb..28855fd5b5f4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -170,13 +170,16 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; }; -#define bpf_link_create_opts__last_field sz +#define bpf_link_create_opts__last_field flags LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, const struct bpf_link_create_opts *opts); +LIBBPF_API int bpf_link_detach(int link_fd); + struct bpf_link_update_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; /* extra flags */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 58eceb884df3..eebf020cbe3e 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -215,7 +215,7 @@ struct pt_regs; #define PT_REGS_PARM5(x) ((x)->regs[8]) #define PT_REGS_RET(x) ((x)->regs[31]) #define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_RC(x) ((x)->regs[2]) #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) @@ -226,7 +226,7 @@ struct pt_regs; #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1]) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index c9e760e120dc..856b09a04563 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -386,7 +386,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, void btf__free(struct btf *btf) { - if (!btf) + if (IS_ERR_OR_NULL(btf)) return; if (btf->fd >= 0) @@ -562,6 +562,83 @@ done: return btf; } +struct btf *btf__parse_raw(const char *path) +{ + void *data = NULL; + struct btf *btf; + FILE *f = NULL; + __u16 magic; + int err = 0; + long sz; + + f = fopen(path, "rb"); + if (!f) { + err = -errno; + goto err_out; + } + + /* check BTF magic */ + if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) { + err = -EIO; + goto err_out; + } + if (magic != BTF_MAGIC) { + /* definitely not a raw BTF */ + err = -EPROTO; + goto err_out; + } + + /* get file size */ + if (fseek(f, 0, SEEK_END)) { + err = -errno; + goto err_out; + } + sz = ftell(f); + if (sz < 0) { + err = -errno; + goto err_out; + } + /* rewind to the start */ + if (fseek(f, 0, SEEK_SET)) { + err = -errno; + goto err_out; + } + + /* pre-alloc memory and read all of BTF data */ + data = malloc(sz); + if (!data) { + err = -ENOMEM; + goto err_out; + } + if (fread(data, 1, sz, f) < sz) { + err = -EIO; + goto err_out; + } + + /* finally parse BTF data */ + btf = btf__new(data, sz); + +err_out: + free(data); + if (f) + fclose(f); + return err ? ERR_PTR(err) : btf; +} + +struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) +{ + struct btf *btf; + + if (btf_ext) + *btf_ext = NULL; + + btf = btf__parse_raw(path); + if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO) + return btf; + + return btf__parse_elf(path, btf_ext); +} + static int compare_vsi_off(const void *_a, const void *_b) { const struct btf_var_secinfo *a = _a; @@ -1025,7 +1102,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) void btf_ext__free(struct btf_ext *btf_ext) { - if (!btf_ext) + if (IS_ERR_OR_NULL(btf_ext)) return; free(btf_ext->data); free(btf_ext); @@ -2951,41 +3028,6 @@ static int btf_dedup_remap_types(struct btf_dedup *d) return 0; } -static struct btf *btf_load_raw(const char *path) -{ - struct btf *btf; - size_t read_cnt; - struct stat st; - void *data; - FILE *f; - - if (stat(path, &st)) - return ERR_PTR(-errno); - - data = malloc(st.st_size); - if (!data) - return ERR_PTR(-ENOMEM); - - f = fopen(path, "rb"); - if (!f) { - btf = ERR_PTR(-errno); - goto cleanup; - } - - read_cnt = fread(data, 1, st.st_size, f); - fclose(f); - if (read_cnt < st.st_size) { - btf = ERR_PTR(-EBADF); - goto cleanup; - } - - btf = btf__new(data, read_cnt); - -cleanup: - free(data); - return btf; -} - /* * Probe few well-known locations for vmlinux kernel image and try to load BTF * data out of it to use for target BTF. @@ -3021,7 +3063,7 @@ struct btf *libbpf_find_kernel_btf(void) continue; if (locations[i].raw_btf) - btf = btf_load_raw(path); + btf = btf__parse_raw(path); else btf = btf__parse_elf(path, NULL); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 491c7b41ffdc..f4a1a1d2b9a3 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -64,8 +64,9 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(const void *data, __u32 size); -LIBBPF_API struct btf *btf__parse_elf(const char *path, - struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_raw(const char *path); LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index e1c344504cae..cf711168d34a 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -183,7 +183,7 @@ void btf_dump__free(struct btf_dump *d) { int i, cnt; - if (!d) + if (IS_ERR_OR_NULL(d)) return; free(d->type_states); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 846164c79df1..7be04e45d29c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6504,7 +6504,7 @@ void bpf_object__close(struct bpf_object *obj) { size_t i; - if (!obj) + if (IS_ERR_OR_NULL(obj)) return; if (obj->clear_priv) @@ -6915,7 +6915,8 @@ static const struct bpf_sec_def section_defs[] = { BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), - BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), @@ -7689,7 +7690,7 @@ int bpf_link__destroy(struct bpf_link *link) { int err = 0; - if (!link) + if (IS_ERR_OR_NULL(link)) return 0; if (!link->disconnected && link->detach) @@ -7747,6 +7748,11 @@ struct bpf_link *bpf_link__open(const char *path) return link; } +int bpf_link__detach(struct bpf_link *link) +{ + return bpf_link_detach(link->fd) ? -errno : 0; +} + int bpf_link__pin(struct bpf_link *link, const char *path) { int err; @@ -7833,6 +7839,9 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, pr_warn("program '%s': failed to attach to pfd %d: %s\n", bpf_program__title(prog, false), pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + if (err == -EPROTO) + pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", + bpf_program__title(prog, false), pfd); return ERR_PTR(err); } if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { @@ -8278,17 +8287,30 @@ bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) return bpf_program__attach_fd(prog, netns_fd, "netns"); } +struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex) +{ + /* target_fd/target_ifindex use the same field in LINK_CREATE */ + return bpf_program__attach_fd(prog, ifindex, "xdp"); +} + struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; + __u32 target_fd = 0; if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); + if (OPTS_HAS(opts, map_fd)) { + target_fd = opts->map_fd; + link_create_opts.flags = BPF_ITER_LINK_MAP_FD; + } + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("program '%s': can't attach before loaded\n", @@ -8301,7 +8323,8 @@ bpf_program__attach_iter(struct bpf_program *prog, return ERR_PTR(-ENOMEM); link->detach = &bpf_link__detach_fd; - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL); + link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, + &link_create_opts); if (link_fd < 0) { link_fd = -errno; free(link); @@ -8484,7 +8507,7 @@ void perf_buffer__free(struct perf_buffer *pb) { int i; - if (!pb) + if (IS_ERR_OR_NULL(pb)) return; if (pb->cpu_bufs) { for (i = 0; i < pb->cpu_cnt; i++) { @@ -9361,8 +9384,7 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->prog_cnt; i++) { struct bpf_link **link = s->progs[i].link; - if (!IS_ERR_OR_NULL(*link)) - bpf_link__destroy(*link); + bpf_link__destroy(*link); *link = NULL; } } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c2272132e929..3ed1399bfbbc 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -229,6 +229,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); +LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * @@ -257,6 +258,8 @@ LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); +LIBBPF_API struct bpf_link * +bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); struct bpf_map; @@ -264,8 +267,9 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 map_fd; }; -#define bpf_iter_attach_opts__last_field sz +#define bpf_iter_attach_opts__last_field map_fd LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 6f0856abe299..0c4722bfdd0a 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -273,6 +273,8 @@ LIBBPF_0.0.9 { LIBBPF_0.1.0 { global: + bpf_link__detach; + bpf_link_detach; bpf_map__ifindex; bpf_map__key_size; bpf_map__map_flags; @@ -286,9 +288,12 @@ LIBBPF_0.1.0 { bpf_map__set_value_size; bpf_map__type; bpf_map__value_size; + bpf_program__attach_xdp; bpf_program__autoload; bpf_program__is_sk_lookup; bpf_program__set_autoload; bpf_program__set_sk_lookup; + btf__parse; + btf__parse_raw; btf__set_fd; } LIBBPF_0.0.9; diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 0fb910df5387..033051717ba5 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -290,3 +290,26 @@ free_mem: free(fhp); return ret; } + +int cgroup_setup_and_join(const char *path) { + int cg_fd; + + if (setup_cgroup_environment()) { + fprintf(stderr, "Failed to setup cgroup environment\n"); + return -EINVAL; + } + + cg_fd = create_and_get_cgroup(path); + if (cg_fd < 0) { + fprintf(stderr, "Failed to create test cgroup\n"); + cleanup_cgroup_environment(); + return cg_fd; + } + + if (join_cgroup(path)) { + fprintf(stderr, "Failed to join cgroup\n"); + cleanup_cgroup_environment(); + return -EINVAL; + } + return cg_fd; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index d64bb8957090..5fe3d88e4f0d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -9,6 +9,7 @@ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) +int cgroup_setup_and_join(const char *path); int create_and_get_cgroup(const char *path); int join_cgroup(const char *path); int setup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index e8da7b39158d..b8d6aef99db4 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -58,20 +58,10 @@ int main(int argc, char **argv) int exit_code = 1; char buf[256]; - err = setup_cgroup_environment(); - if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err, - errno)) + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n", - cgroup_fd, errno)) - goto cleanup_cgroup_env; - - err = join_cgroup(TEST_CGROUP); - if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno)) - goto cleanup_cgroup_env; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index fed42755416d..4ffefdc1130f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -15,6 +15,13 @@ #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" #include "bpf_iter_test_kern4.skel.h" +#include "bpf_iter_bpf_hash_map.skel.h" +#include "bpf_iter_bpf_percpu_hash_map.skel.h" +#include "bpf_iter_bpf_array_map.skel.h" +#include "bpf_iter_bpf_percpu_array_map.skel.h" +#include "bpf_iter_bpf_sk_storage_map.skel.h" +#include "bpf_iter_test_kern5.skel.h" +#include "bpf_iter_test_kern6.skel.h" static int duration; @@ -455,6 +462,440 @@ out: bpf_iter_test_kern4__destroy(skel); } +static void test_bpf_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_hash_map *skel; + int err, i, len, map_fd, iter_fd; + __u64 val, expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + + skel = bpf_iter_bpf_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__open", + "skeleton open failed\n")) + return; + + skel->bss->in_test_mode = true; + + err = bpf_iter_bpf_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* iterator with hashmap2 and hashmap3 should fail */ + opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap2 unexpected succeeded\n")) + goto out; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap3 unexpected succeeded\n")) + goto out; + + /* hashmap1 should be good, update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + val = i + 4; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_hash_map__destroy(skel); +} + +static void test_bpf_percpu_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_hash_map *skel; + int err, i, j, len, map_fd, iter_fd; + __u32 expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + void *val; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_hash_map__destroy(skel); +} + +static void test_bpf_array_map(void) +{ + __u64 val, expected_val = 0, res_first_val, first_val = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + __u32 expected_key = 0, res_first_key; + struct bpf_iter_bpf_array_map *skel; + int err, i, map_fd, iter_fd; + struct bpf_link *link; + char buf[64] = {}; + int len, start; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + val = i + 4; + expected_key += i; + expected_val += val; + + if (i == 0) + first_val = val; + + err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + start = 0; + while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) + start += len; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + res_first_key = *(__u32 *)buf; + res_first_val = *(__u64 *)(buf + sizeof(__u32)); + if (CHECK(res_first_key != 0 || res_first_val != first_val, + "bpf_seq_write", + "seq_write failure: first key %u vs expected 0, " + " first value %llu vs expected %llu\n", + res_first_key, res_first_val, first_val)) + goto close_iter; + + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + err = bpf_map_lookup_elem(map_fd, &i, &val); + if (CHECK(err, "map_lookup", "map_lookup failed\n")) + goto out; + if (CHECK(i != val, "invalid_val", + "got value %llu expected %u\n", val, i)) + goto out; + } + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_array_map__destroy(skel); +} + +static void test_bpf_percpu_array_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_array_map *skel; + __u32 expected_key = 0, expected_val = 0; + int err, i, j, map_fd, iter_fd; + struct bpf_link *link; + char buf[64]; + void *val; + int len; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_array_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_array_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + expected_key += i; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_array_map__destroy(skel); +} + +static void test_bpf_sk_storage_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, i, len, map_fd, iter_fd, num_sockets; + struct bpf_iter_bpf_sk_storage_map *skel; + int sock_fd[3] = {-1, -1, -1}; + __u32 val, expected_val = 0; + struct bpf_link *link; + char buf[64]; + + skel = bpf_iter_bpf_sk_storage_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.sk_stg_map); + num_sockets = ARRAY_SIZE(sock_fd); + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno)) + goto out; + + val = i + 1; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &sock_fd[i], &val, + BPF_NOEXIST); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->ipv6_sk_count != num_sockets, + "ipv6_sk_count", "got %u expected %u\n", + skel->bss->ipv6_sk_count, num_sockets)) + goto close_iter; + + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; i < num_sockets; i++) { + if (sock_fd[i] >= 0) + close(sock_fd[i]); + } + bpf_iter_bpf_sk_storage_map__destroy(skel); +} + +static void test_rdonly_buf_out_of_bound(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_test_kern5 *skel; + struct bpf_link *link; + + skel = bpf_iter_test_kern5__open_and_load(); + if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load", + "skeleton open_and_load failed\n")) + return; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) + bpf_link__destroy(link); + + bpf_iter_test_kern5__destroy(skel); +} + +static void test_buf_neg_offset(void) +{ + struct bpf_iter_test_kern6 *skel; + + skel = bpf_iter_test_kern6__open_and_load(); + if (CHECK(skel, "bpf_iter_test_kern6__open_and_load", + "skeleton open_and_load unexpected success\n")) + bpf_iter_test_kern6__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -491,4 +932,18 @@ void test_bpf_iter(void) test_overflow(true, false); if (test__start_subtest("prog-ret-1")) test_overflow(false, true); + if (test__start_subtest("bpf_hash_map")) + test_bpf_hash_map(); + if (test__start_subtest("bpf_percpu_hash_map")) + test_bpf_percpu_hash_map(); + if (test__start_subtest("bpf_array_map")) + test_bpf_array_map(); + if (test__start_subtest("bpf_percpu_array_map")) + test_bpf_percpu_array_map(); + if (test__start_subtest("bpf_sk_storage_map")) + test_bpf_sk_storage_map(); + if (test__start_subtest("rdonly-buf-out-of-bound")) + test_rdonly_buf_out_of_bound(); + if (test__start_subtest("buf-neg-offset")) + test_buf_neg_offset(); } diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c new file mode 100644 index 000000000000..643dfa35419c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "progs/cg_storage_multi.h" + +#include "cg_storage_multi_egress_only.skel.h" +#include "cg_storage_multi_isolated.skel.h" +#include "cg_storage_multi_shared.skel.h" + +#define PARENT_CGROUP "/cgroup_storage" +#define CHILD_CGROUP "/cgroup_storage/child" + +static int duration; + +static bool assert_storage(struct bpf_map *map, const void *key, + struct cgroup_value *expected) +{ + struct cgroup_value value; + int map_fd; + + map_fd = bpf_map__fd(map); + + if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0, + "map-lookup", "errno %d", errno)) + return true; + if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)), + "assert-storage", "storages differ")) + return true; + + return false; +} + +static bool assert_storage_noexist(struct bpf_map *map, const void *key) +{ + struct cgroup_value value; + int map_fd; + + map_fd = bpf_map__fd(map); + + if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0, + "map-lookup", "succeeded, expected ENOENT")) + return true; + if (CHECK(errno != ENOENT, + "map-lookup", "errno %d, expected ENOENT", errno)) + return true; + + return false; +} + +static bool connect_send(const char *cgroup_path) +{ + bool res = true; + int server_fd = -1, client_fd = -1; + + if (join_cgroup(cgroup_path)) + goto out_clean; + + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); + if (server_fd < 0) + goto out_clean; + + client_fd = connect_to_fd(server_fd, 0); + if (client_fd < 0) + goto out_clean; + + if (send(client_fd, "message", strlen("message"), 0) < 0) + goto out_clean; + + res = false; + +out_clean: + close(client_fd); + close(server_fd); + return res; +} + +static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_egress_only *obj; + struct cgroup_value expected_cgroup_value; + struct bpf_cgroup_storage_key key; + struct bpf_link *parent_link = NULL, *child_link = NULL; + bool err; + + key.attach_type = BPF_CGROUP_INET_EGRESS; + + obj = cg_storage_multi_egress_only__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is only one run and in that run the storage is + * parent cgroup's storage. + * Also assert that child cgroup's storage does not exist + */ + parent_link = bpf_program__attach_cgroup(obj->progs.egress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_link), "parent-cg-attach", + "err %ld", PTR_ERR(parent_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 1, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there are two additional runs, one that run with parent + * cgroup's storage and one with child cgroup's storage. + */ + child_link = bpf_program__attach_cgroup(obj->progs.egress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_link), "child-cg-attach", + "err %ld", PTR_ERR(child_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_link)) + bpf_link__destroy(parent_link); + if (!IS_ERR(child_link)) + bpf_link__destroy(child_link); + + cg_storage_multi_egress_only__destroy(obj); +} + +static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_isolated *obj; + struct cgroup_value expected_cgroup_value; + struct bpf_cgroup_storage_key key; + struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL; + struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL; + struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL; + bool err; + + obj = cg_storage_multi_isolated__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is three runs, two with parent cgroup egress and + * one with parent cgroup ingress, stored in separate parent storages. + * Also assert that child cgroup's storages does not exist + */ + parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", + "err %ld", PTR_ERR(parent_egress1_link))) + goto close_bpf_object; + parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", + "err %ld", PTR_ERR(parent_egress2_link))) + goto close_bpf_object; + parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", + "err %ld", PTR_ERR(parent_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there is six additional runs, parent cgroup egresses and + * ingress, child cgroup egresses and ingress. + * Assert that egree and ingress storages are separate. + */ + child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", + "err %ld", PTR_ERR(child_egress1_link))) + goto close_bpf_object; + child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", + "err %ld", PTR_ERR(child_egress2_link))) + goto close_bpf_object; + child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", + "err %ld", PTR_ERR(child_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 9, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP); + key.attach_type = BPF_CGROUP_INET_EGRESS; + expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key.attach_type = BPF_CGROUP_INET_INGRESS; + expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_egress1_link)) + bpf_link__destroy(parent_egress1_link); + if (!IS_ERR(parent_egress2_link)) + bpf_link__destroy(parent_egress2_link); + if (!IS_ERR(parent_ingress_link)) + bpf_link__destroy(parent_ingress_link); + if (!IS_ERR(child_egress1_link)) + bpf_link__destroy(child_egress1_link); + if (!IS_ERR(child_egress2_link)) + bpf_link__destroy(child_egress2_link); + if (!IS_ERR(child_ingress_link)) + bpf_link__destroy(child_ingress_link); + + cg_storage_multi_isolated__destroy(obj); +} + +static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) +{ + struct cg_storage_multi_shared *obj; + struct cgroup_value expected_cgroup_value; + __u64 key; + struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL; + struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL; + struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL; + bool err; + + obj = cg_storage_multi_shared__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + /* Attach to parent cgroup, trigger packet from child. + * Assert that there is three runs, two with parent cgroup egress and + * one with parent cgroup ingress. + * Also assert that child cgroup's storage does not exist + */ + parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", + "err %ld", PTR_ERR(parent_egress1_link))) + goto close_bpf_object; + parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", + "err %ld", PTR_ERR(parent_egress2_link))) + goto close_bpf_object; + parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + parent_cgroup_fd); + if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", + "err %ld", PTR_ERR(parent_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "first-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 3, + "first-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 2, + .ingress_pkts = 1, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key = get_cgroup_id(CHILD_CGROUP); + if (assert_storage_noexist(obj->maps.cgroup_storage, &key)) + goto close_bpf_object; + + /* Attach to parent and child cgroup, trigger packet from child. + * Assert that there is six additional runs, parent cgroup egresses and + * ingress, child cgroup egresses and ingress. + */ + child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", + "err %ld", PTR_ERR(child_egress1_link))) + goto close_bpf_object; + child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, + child_cgroup_fd); + if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", + "err %ld", PTR_ERR(child_egress2_link))) + goto close_bpf_object; + child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, + child_cgroup_fd); + if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", + "err %ld", PTR_ERR(child_ingress_link))) + goto close_bpf_object; + err = connect_send(CHILD_CGROUP); + if (CHECK(err, "second-connect-send", "errno %d", errno)) + goto close_bpf_object; + if (CHECK(obj->bss->invocations != 9, + "second-invoke", "invocations=%d", obj->bss->invocations)) + goto close_bpf_object; + key = get_cgroup_id(PARENT_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 4, + .ingress_pkts = 2, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + key = get_cgroup_id(CHILD_CGROUP); + expected_cgroup_value = (struct cgroup_value) { + .egress_pkts = 2, + .ingress_pkts = 1, + }; + if (assert_storage(obj->maps.cgroup_storage, + &key, &expected_cgroup_value)) + goto close_bpf_object; + +close_bpf_object: + if (!IS_ERR(parent_egress1_link)) + bpf_link__destroy(parent_egress1_link); + if (!IS_ERR(parent_egress2_link)) + bpf_link__destroy(parent_egress2_link); + if (!IS_ERR(parent_ingress_link)) + bpf_link__destroy(parent_ingress_link); + if (!IS_ERR(child_egress1_link)) + bpf_link__destroy(child_egress1_link); + if (!IS_ERR(child_egress2_link)) + bpf_link__destroy(child_egress2_link); + if (!IS_ERR(child_ingress_link)) + bpf_link__destroy(child_ingress_link); + + cg_storage_multi_shared__destroy(obj); +} + +void test_cg_storage_multi(void) +{ + int parent_cgroup_fd = -1, child_cgroup_fd = -1; + + parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP); + if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno)) + goto close_cgroup_fd; + child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP); + if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno)) + goto close_cgroup_fd; + + if (test__start_subtest("egress_only")) + test_egress_only(parent_cgroup_fd, child_cgroup_fd); + + if (test__start_subtest("isolated")) + test_isolated(parent_cgroup_fd, child_cgroup_fd); + + if (test__start_subtest("shared")) + test_shared(parent_cgroup_fd, child_cgroup_fd); + +close_cgroup_fd: + close(child_cgroup_fd); + close(parent_cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 6e04f8d1d15b..4d9b514b3fd9 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -2,6 +2,7 @@ #include <test_progs.h> #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "test_cgroup_link.skel.h" static __u32 duration = 0; @@ -37,7 +38,8 @@ void test_cgroup_link(void) int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs); DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts); struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link; - __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags; + __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id; + struct bpf_link_info info; int i = 0, err, prog_fd; bool detach_legacy = false; @@ -219,6 +221,22 @@ void test_cgroup_link(void) /* BPF programs should still get called */ ping_and_check(0, cg_nr); + prog_id = link_info_prog_id(links[0], &info); + CHECK(prog_id == 0, "link_info", "failed\n"); + CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id); + + err = bpf_link__detach(links[0]); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto cleanup; + + /* cgroup_id should be zero in link_info */ + prog_id = link_info_prog_id(links[0], &info); + CHECK(prog_id == 0, "link_info", "failed\n"); + CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id); + + /* First BPF program shouldn't be called anymore */ + ping_and_check(0, cg_nr - 1); + /* leave cgroup and remove them, don't detach programs */ cleanup_cgroup_environment(); diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c index 78e30d3a23d5..6acb0e94d4d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_retro.c +++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c @@ -6,7 +6,7 @@ void test_core_retro(void) { - int err, zero = 0, res, duration = 0; + int err, zero = 0, res, duration = 0, my_pid = getpid(); struct test_core_retro *skel; /* load program */ @@ -14,6 +14,10 @@ void test_core_retro(void) if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0); + if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno)) + goto out_close; + /* attach probe */ err = test_core_retro__attach(skel); if (CHECK(err, "attach_kprobe", "err %d\n", err)) @@ -26,7 +30,7 @@ void test_core_retro(void) if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno)) goto out_close; - CHECK(res != getpid(), "pid_check", "got %d != exp %d\n", res, getpid()); + CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid); out_close: test_core_retro__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c new file mode 100644 index 000000000000..d884b2ed5bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" + +void test_get_stackid_cannot_attach(void) +{ + struct perf_event_attr attr = { + /* .type = PERF_TYPE_SOFTWARE, */ + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .precise_ip = 1, + .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK, + .branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_CALL_STACK, + .sample_period = 5000, + .size = sizeof(struct perf_event_attr), + }; + struct test_stacktrace_build_id *skel; + __u32 duration = 0; + int pmu_fd, err; + + skel = test_stacktrace_build_id__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + return; + + /* override program type */ + bpf_program__set_perf_event(skel->progs.oncpu); + + err = test_stacktrace_build_id__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n", + __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain", + "should have failed\n"); + close(pmu_fd); + + /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */ + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain", + "err: %ld\n", PTR_ERR(skel->links.oncpu)); + close(pmu_fd); + + /* add exclude_callchain_kernel, attach should fail */ + attr.exclude_callchain_kernel = 1; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto cleanup; + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel", + "should have failed\n"); + close(pmu_fd); + +cleanup: + test_stacktrace_build_id__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c new file mode 100644 index 000000000000..72c3690844fb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#define _GNU_SOURCE +#include <pthread.h> +#include <sched.h> +#include <test_progs.h> +#include "perf_event_stackmap.skel.h" + +#ifndef noinline +#define noinline __attribute__((noinline)) +#endif + +noinline int func_1(void) +{ + static int val = 1; + + val += 1; + + usleep(100); + return val; +} + +noinline int func_2(void) +{ + return func_1(); +} + +noinline int func_3(void) +{ + return func_2(); +} + +noinline int func_4(void) +{ + return func_3(); +} + +noinline int func_5(void) +{ + return func_4(); +} + +noinline int func_6(void) +{ + int i, val = 1; + + for (i = 0; i < 100; i++) + val += func_5(); + + return val; +} + +void test_perf_event_stackmap(void) +{ + struct perf_event_attr attr = { + /* .type = PERF_TYPE_SOFTWARE, */ + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .precise_ip = 2, + .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK | + PERF_SAMPLE_CALLCHAIN, + .branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_CALL_STACK, + .sample_period = 5000, + .size = sizeof(struct perf_event_attr), + }; + struct perf_event_stackmap *skel; + __u32 duration = 0; + cpu_set_t cpu_set; + int pmu_fd, err; + + skel = perf_event_stackmap__open(); + + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + return; + + err = perf_event_stackmap__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; + + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno)) + goto cleanup; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (pmu_fd < 0) { + printf("%s:SKIP:cpu doesn't support the event\n", __func__); + test__skip(); + goto cleanup; + } + + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.oncpu))) { + close(pmu_fd); + goto cleanup; + } + + /* create kernel and user stack traces for testing */ + func_6(); + + CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n"); + CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n"); + CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n"); + CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n"); + +cleanup: + perf_event_stackmap__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 713167449c98..8b571890c57e 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -35,7 +35,7 @@ static struct sec_name_test tests[] = { {-EINVAL, 0}, }, {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} }, - {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, + {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} }, {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} }, diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index f1784ae4565a..c571584c00f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -34,6 +34,7 @@ #include "bpf_util.h" #include "cgroup_helpers.h" #include "network_helpers.h" +#include "testing_helpers.h" #include "test_sk_lookup.skel.h" /* External (address, port) pairs the client sends packets to. */ @@ -74,6 +75,7 @@ struct test { struct inet_addr connect_to; struct inet_addr listen_at; enum server accept_on; + bool reuseport_has_conns; /* Add a connected socket to reuseport group */ }; static __u32 duration; /* for CHECK macro */ @@ -468,34 +470,10 @@ static int update_lookup_map(struct bpf_map *map, int index, int sock_fd) return 0; } -static __u32 link_info_prog_id(struct bpf_link *link) -{ - struct bpf_link_info info = {}; - __u32 info_len = sizeof(info); - int link_fd, err; - - link_fd = bpf_link__fd(link); - if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) { - errno = -link_fd; - log_err("bpf_link__fd failed"); - return 0; - } - - err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len); - if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) { - log_err("bpf_obj_get_info_by_fd"); - return 0; - } - if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd", - "unexpected info len %u\n", info_len)) - return 0; - - return info.prog_id; -} - static void query_lookup_prog(struct test_sk_lookup *skel) { struct bpf_link *link[3] = {}; + struct bpf_link_info info; __u32 attach_flags = 0; __u32 prog_ids[3] = {}; __u32 prog_cnt = 3; @@ -533,18 +511,36 @@ static void query_lookup_prog(struct test_sk_lookup *skel) if (CHECK(prog_cnt != 3, "bpf_prog_query", "wrong program count on query: %u", prog_cnt)) goto detach; - prog_id = link_info_prog_id(link[0]); + prog_id = link_info_prog_id(link[0], &info); CHECK(prog_ids[0] != prog_id, "bpf_prog_query", "invalid program #0 id on query: %u != %u\n", prog_ids[0], prog_id); - prog_id = link_info_prog_id(link[1]); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + prog_id = link_info_prog_id(link[1], &info); CHECK(prog_ids[1] != prog_id, "bpf_prog_query", "invalid program #1 id on query: %u != %u\n", prog_ids[1], prog_id); - prog_id = link_info_prog_id(link[2]); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + prog_id = link_info_prog_id(link[2], &info); CHECK(prog_ids[2] != prog_id, "bpf_prog_query", "invalid program #2 id on query: %u != %u\n", prog_ids[2], prog_id); + CHECK(info.netns.netns_ino == 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); + + err = bpf_link__detach(link[0]); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto detach; + + /* prog id is still there, but netns_ino is zeroed out */ + prog_id = link_info_prog_id(link[0], &info); + CHECK(prog_ids[0] != prog_id, "bpf_prog_query", + "invalid program #0 id on query: %u != %u\n", + prog_ids[0], prog_id); + CHECK(info.netns.netns_ino != 0, "netns_ino", + "unexpected netns_ino: %u\n", info.netns.netns_ino); detach: if (link[2]) @@ -559,7 +555,8 @@ close: static void run_lookup_prog(const struct test *t) { - int client_fd, server_fds[MAX_SERVERS] = { -1 }; + int server_fds[MAX_SERVERS] = { -1 }; + int client_fd, reuse_conn_fd = -1; struct bpf_link *lookup_link; int i, err; @@ -583,6 +580,32 @@ static void run_lookup_prog(const struct test *t) break; } + /* Regular UDP socket lookup with reuseport behaves + * differently when reuseport group contains connected + * sockets. Check that adding a connected UDP socket to the + * reuseport group does not affect how reuseport works with + * BPF socket lookup. + */ + if (t->reuseport_has_conns) { + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + + /* Add an extra socket to reuseport group */ + reuse_conn_fd = make_server(t->sotype, t->listen_at.ip, + t->listen_at.port, + t->reuseport_prog); + if (reuse_conn_fd < 0) + goto close; + + /* Connect the extra socket to itself */ + err = getsockname(reuse_conn_fd, (void *)&addr, &len); + if (CHECK(err, "getsockname", "errno %d\n", errno)) + goto close; + err = connect(reuse_conn_fd, (void *)&addr, len); + if (CHECK(err, "connect", "errno %d\n", errno)) + goto close; + } + client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port); if (client_fd < 0) goto close; @@ -594,6 +617,8 @@ static void run_lookup_prog(const struct test *t) close(client_fd); close: + if (reuse_conn_fd != -1) + close(reuse_conn_fd); for (i = 0; i < ARRAY_SIZE(server_fds); i++) { if (server_fds[i] != -1) close(server_fds[i]); @@ -711,6 +736,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel) .accept_on = SERVER_B, }, { + .desc = "UDP IPv4 redir and reuseport with conns", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { INT_IP4, INT_PORT }, + .accept_on = SERVER_B, + .reuseport_has_conns = true, + }, + { .desc = "UDP IPv4 redir skip reuseport", .lookup_prog = skel->progs.select_sock_a_no_reuseport, .reuseport_prog = skel->progs.select_sock_b, @@ -755,6 +791,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel) .accept_on = SERVER_B, }, { + .desc = "UDP IPv6 redir and reuseport with conns", + .lookup_prog = skel->progs.select_sock_a, + .reuseport_prog = skel->progs.select_sock_b, + .sock_map = skel->maps.redir_map, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { INT_IP6, INT_PORT }, + .accept_on = SERVER_B, + .reuseport_has_conns = true, + }, + { .desc = "UDP IPv6 redir skip reuseport", .lookup_prog = skel->progs.select_sock_a_no_reuseport, .reuseport_prog = skel->progs.select_sock_b, @@ -1238,8 +1285,8 @@ static void run_tests(struct test_sk_lookup *skel) static int switch_netns(void) { static const char * const setup_script[] = { - "ip -6 addr add dev lo " EXT_IP6 "/128 nodad", - "ip -6 addr add dev lo " INT_IP6 "/128 nodad", + "ip -6 addr add dev lo " EXT_IP6 "/128", + "ip -6 addr add dev lo " INT_IP6 "/128", "ip link set dev lo up", NULL, }; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index 7021b92af313..25de86af2d03 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -11,6 +11,7 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, + .ifindex = 1, .tstamp = 7, .wire_len = 100, .gso_segs = 8, @@ -92,6 +93,10 @@ void test_skb_ctx(void) "ctx_out_priority", "skb->priority == %d, expected %d\n", skb.priority, 7); + CHECK_ATTR(skb.ifindex != 1, + "ctx_out_ifindex", + "skb->ifindex == %d, expected %d\n", + skb.ifindex, 1); CHECK_ATTR(skb.tstamp != 8, "ctx_out_tstamp", "skb->tstamp == %lld, expected %d\n", diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c new file mode 100644 index 000000000000..6f814999b395 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <uapi/linux/if_link.h> +#include <test_progs.h> +#include "test_xdp_link.skel.h" + +#define IFINDEX_LO 1 + +void test_xdp_link(void) +{ + __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; + DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); + struct test_xdp_link *skel1 = NULL, *skel2 = NULL; + struct bpf_link_info link_info; + struct bpf_prog_info prog_info; + struct bpf_link *link; + __u32 link_info_len = sizeof(link_info); + __u32 prog_info_len = sizeof(prog_info); + + skel1 = test_xdp_link__open_and_load(); + if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) + goto cleanup; + prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); + + skel2 = test_xdp_link__open_and_load(); + if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) + goto cleanup; + prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); + if (CHECK(err, "fd_info1", "failed %d\n", -errno)) + goto cleanup; + id1 = prog_info.id; + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); + if (CHECK(err, "fd_info2", "failed %d\n", -errno)) + goto cleanup; + id2 = prog_info.id; + + /* set initial prog attachment */ + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); + if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) + goto cleanup; + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err); + + /* BPF link is not allowed to replace prog attachment */ + link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); + if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + bpf_link__destroy(link); + /* best-effort detach prog */ + opts.old_fd = prog_fd1; + bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + goto cleanup; + } + + /* detach BPF program */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); + if (CHECK(err, "prog_detach", "failed %d\n", err)) + goto cleanup; + + /* now BPF link should attach successfully */ + link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto cleanup; + skel1->links.xdp_handler = link; + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + goto cleanup; + + /* BPF prog attach is not allowed to replace BPF link */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); + if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) + goto cleanup; + + /* Can't force-update when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); + if (CHECK(!err, "prog_update_fail", "unexpected success\n")) + goto cleanup; + + /* Can't force-detach when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); + if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) + goto cleanup; + + /* BPF link is not allowed to replace another BPF link */ + link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); + if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + bpf_link__destroy(link); + goto cleanup; + } + + bpf_link__destroy(skel1->links.xdp_handler); + skel1->links.xdp_handler = NULL; + + /* new link attach should succeed */ + link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto cleanup; + skel2->links.xdp_handler = link; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id2, "id2_check", + "loaded prog id %u != id2 %u, err %d", id0, id1, err)) + goto cleanup; + + /* updating program under active BPF link works as expected */ + err = bpf_link__update_program(link, skel1->progs.xdp_handler); + if (CHECK(err, "link_upd", "failed: %d\n", err)) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto cleanup; + + CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", + "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); + CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + + err = bpf_link__detach(link); + if (CHECK(err, "link_detach", "failed %d\n", err)) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto cleanup; + CHECK(link_info.prog_id != id1, "link_prog_id", + "got %u != exp %u\n", link_info.prog_id, id1); + /* ifindex should be zeroed out */ + CHECK(link_info.xdp.ifindex != 0, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, 0); + +cleanup: + test_xdp_link__destroy(skel1); + test_xdp_link__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 17db3bac518b..c196280df90d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -11,6 +11,8 @@ #define tcp6_sock tcp6_sock___not_used #define bpf_iter__udp bpf_iter__udp___not_used #define udp6_sock udp6_sock___not_used +#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used +#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -22,6 +24,8 @@ #undef tcp6_sock #undef bpf_iter__udp #undef udp6_sock +#undef bpf_iter__bpf_map_elem +#undef bpf_iter__bpf_sk_storage_map struct bpf_iter_meta { struct seq_file *seq; @@ -78,3 +82,17 @@ struct udp6_sock { struct udp_sock udp; struct ipv6_pinfo inet6; } __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map_elem { + struct bpf_iter_meta *meta; + struct bpf_map *map; + void *key; + void *value; +}; + +struct bpf_iter__bpf_sk_storage_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; + struct sock *sk; + void *value; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c new file mode 100644 index 000000000000..6286023fd62b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} arraymap1 SEC(".maps"); + +__u32 key_sum = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + __u64 *val = ctx->value; + + if (key == (void *)0 || val == (void *)0) + return 0; + + bpf_seq_write(ctx->meta->seq, key, sizeof(__u32)); + bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); + key_sum += *key; + val_sum += *val; + *val = *key; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c new file mode 100644 index 000000000000..07ddbfdbcab7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, __u64); + __type(value, __u64); +} hashmap2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap3 SEC(".maps"); + +/* will set before prog run */ +bool in_test_mode = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + __u32 seq_num = ctx->meta->seq_num; + struct bpf_map *map = ctx->map; + struct key_t *key = ctx->key; + __u64 *val = ctx->value; + + if (in_test_mode) { + /* test mode is used by selftests to + * test functionality of bpf_hash_map iter. + * + * the above hashmap1 will have correct size + * and will be accepted, hashmap2 and hashmap3 + * should be rejected due to smaller key/value + * size. + */ + if (key == (void *)0 || val == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + val_sum += *val; + return 0; + } + + /* non-test mode, the map is prepared with the + * below bpftool command sequence: + * bpftool map create /sys/fs/bpf/m1 type hash \ + * key 12 value 8 entries 3 name map1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \ + * value 0 0 0 1 0 0 0 1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \ + * value 0 0 0 1 0 0 0 2 + * The bpftool iter command line: + * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \ + * map id 77 + * The below output will be: + * map dump starts + * 77: (1000000 0 2000000) (200000001000000) + * 77: (1000000 0 1000000) (100000001000000) + * map dump ends + */ + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, "map dump starts\n"); + + if (key == (void *)0 || val == (void *)0) { + BPF_SEQ_PRINTF(seq, "map dump ends\n"); + return 0; + } + + BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id, + key->a, key->b, key->c, *val); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c new file mode 100644 index 000000000000..85fa710fad90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u32); +} arraymap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +__u32 key_sum = 0, val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum += *key; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c new file mode 100644 index 000000000000..feaaa2b89c57 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u32 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct key_t *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c new file mode 100644 index 000000000000..6b70ccaba301 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +__u32 val_sum = 0; +__u32 ipv6_sk_count = 0; + +SEC("iter/bpf_sk_storage_map") +int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +{ + struct sock *sk = ctx->sk; + __u32 *val = ctx->value; + + if (sk == (void *)0 || val == (void *)0) + return 0; + + if (sk->sk_family == AF_INET6) + ipv6_sk_count++; + + val_sum += *val; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c new file mode 100644 index 000000000000..e3a7575e81d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +__u32 key_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + void *key = ctx->key; + + if (key == (void *)0) + return 0; + + /* out of bound access w.r.t. hashmap1 */ + key_sum += *(__u32 *)(key + sizeof(struct key_t)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c new file mode 100644 index 000000000000..1c7304f56b1e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + void *value = ctx->value; + + if (value == (void *)0) + return 0; + + /* negative offset, verifier failure. */ + value_sum += *(__u32 *)(value - 4); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h new file mode 100644 index 000000000000..a0778fe7857a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __PROGS_CG_STORAGE_MULTI_H +#define __PROGS_CG_STORAGE_MULTI_H + +#include <asm/types.h> + +struct cgroup_value { + __u32 egress_pkts; + __u32 ingress_pkts; +}; + +#endif diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c new file mode 100644 index 000000000000..44ad46b33539 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress") +int egress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c new file mode 100644 index 000000000000..a25373002055 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress/1") +int egress1(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/egress/2") +int egress2(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/ingress") +int ingress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c new file mode 100644 index 000000000000..a149f33bc533 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> + +#include "progs/cg_storage_multi.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, struct cgroup_value); +} cgroup_storage SEC(".maps"); + +__u32 invocations = 0; + +SEC("cgroup_skb/egress/1") +int egress1(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/egress/2") +int egress2(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} + +SEC("cgroup_skb/ingress") +int ingress(struct __sk_buff *skb) +{ + struct cgroup_value *ptr_cg_storage = + bpf_get_local_storage(&cgroup_storage, 0); + + __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1); + __sync_fetch_and_add(&invocations, 1); + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c new file mode 100644 index 000000000000..25467d13c356 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(stack_trace_t)); +} stackmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, stack_trace_t); +} stackdata_map SEC(".maps"); + +long stackid_kernel = 1; +long stackid_user = 1; +long stack_kernel = 1; +long stack_user = 1; + +SEC("perf_event") +int oncpu(void *ctx) +{ + stack_trace_t *trace; + __u32 key = 0; + long val; + + val = bpf_get_stackid(ctx, &stackmap, 0); + if (val > 0) + stackid_kernel = 2; + val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); + if (val > 0) + stackid_user = 2; + + trace = bpf_map_lookup_elem(&stackdata_map, &key); + if (!trace) + return 0; + + val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0); + if (val > 0) + stack_kernel = 2; + + val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK); + if (val > 0) + stack_user = 2; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c index 75c60c3c29cf..20861ec2f674 100644 --- a/tools/testing/selftests/bpf/progs/test_core_retro.c +++ b/tools/testing/selftests/bpf/progs/test_core_retro.c @@ -13,6 +13,13 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, int); +} exp_tgid_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); } results SEC(".maps"); SEC("tp/raw_syscalls/sys_enter") @@ -21,6 +28,12 @@ int handle_sys_enter(void *ctx) struct task_struct *task = (void *)bpf_get_current_task(); int tgid = BPF_CORE_READ(task, tgid); int zero = 0; + int real_tgid = bpf_get_current_pid_tgid() >> 32; + int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero); + + /* only pass through sys_enters from test process */ + if (!exp_tgid || *exp_tgid != real_tgid) + return 0; bpf_map_update_elem(&results, &zero, &tgid, 0); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c new file mode 100644 index 000000000000..eb93ea95d1d8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char LICENSE[] SEC("license") = "GPL"; + +SEC("xdp/handler") +int xdp_handler(struct xdp_md *xdp) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c index 8429b22525a7..165e3c2dd9a3 100644 --- a/tools/testing/selftests/bpf/progs/udp_limit.c +++ b/tools/testing/selftests/bpf/progs/udp_limit.c @@ -6,14 +6,28 @@ int invocations = 0, in_use = 0; +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_map SEC(".maps"); + SEC("cgroup/sock_create") int sock(struct bpf_sock *ctx) { + int *sk_storage; __u32 key; if (ctx->type != SOCK_DGRAM) return 1; + sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!sk_storage) + return 0; + *sk_storage = 0xdeadbeef; + __sync_fetch_and_add(&invocations, 1); if (in_use > 0) { @@ -31,11 +45,16 @@ int sock(struct bpf_sock *ctx) SEC("cgroup/sock_release") int sock_release(struct bpf_sock *ctx) { + int *sk_storage; __u32 key; if (ctx->type != SOCK_DGRAM) return 1; + sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0); + if (!sk_storage || *sk_storage != 0xdeadbeef) + return 0; + __sync_fetch_and_add(&invocations, 1); __sync_fetch_and_add(&in_use, -1); return 1; diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index a53ed58528d6..bfff82be3fc1 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -34,7 +34,7 @@ serverPort = int(sys.argv[1]) # create active socket sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) try: - sock.connect(('localhost', serverPort)) + sock.connect(('::1', serverPort)) except socket.error as e: sys.exit(1) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index 0ca60d193bed..42ab8882f00f 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -38,7 +38,7 @@ serverSocket = None # create passive socket serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -try: serverSocket.bind(('localhost', 0)) +try: serverSocket.bind(('::1', 0)) except socket.error as msg: print('bind fails: ' + str(msg)) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 655729004391..d946252a25bb 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -74,22 +74,7 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to setup cgroup environment\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (cgroup_fd < 0) { - printf("Failed to create test cgroup\n"); - goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; - } + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); /* Attach the bpf program */ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index d850fb9076b5..804dddd97d4c 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -33,21 +33,10 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to load DEV_CGROUP program\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); - goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; + goto out; } /* Attach bpf program */ diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c index c1da5404454a..a7b9a69f4fd5 100644 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -58,22 +58,9 @@ int main(int argc, char **argv) goto out; } - if (setup_cgroup_environment()) { - printf("Failed to load bpf program\n"); - goto err; - } - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (cgroup_fd < 0) { - printf("Failed to create test cgroup\n"); + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (cgroup_fd < 0) goto err; - } - - if (join_cgroup(TEST_CGROUP)) { - printf("Failed to join cgroup\n"); - goto err; - } /* Attach bpf program */ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { @@ -82,9 +69,9 @@ int main(int argc, char **argv) } if (system("which ping6 &>/dev/null") == 0) - assert(!system("ping6 localhost -c 10000 -f -q > /dev/null")); + assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null")); else - assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null")); + assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null")); if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, &prog_cnt)) { diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 356351c0ac28..4a64306728ab 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -160,16 +160,10 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CGROUP_PATH); + cgfd = cgroup_setup_and_join(CGROUP_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CGROUP_PATH)) - goto err; - if (send_packet(argv[1])) goto err; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 52bf14955797..9613f7538840 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -464,16 +464,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 0358814c67dc..b8c72c1d9cf7 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -1638,16 +1638,10 @@ int main(int argc, char **argv) exit(err); } - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index f0fc103261a4..6c9f269c396d 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -421,19 +421,11 @@ int main(int argc, char **argv) struct bpf_object *obj; struct bpf_map *map; - err = setup_cgroup_environment(); - CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", - err, errno); - - atexit(cleanup_cgroup_environment); - /* Create a cgroup, get fd, and join it */ - cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - CHECK(cgroup_fd == -1, "create_and_get_cgroup()", + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", "cgroup_fd:%d errno:%d", cgroup_fd, errno); - - err = join_cgroup(TEST_CGROUP); - CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); + atexit(cleanup_cgroup_environment); err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 15653b0e26eb..154a8fd2a48d 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -191,16 +191,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_test(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 78789b27e573..9b6fb00dc7a0 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1963,23 +1963,9 @@ int main(int argc, char **argv) } if (!cg_fd) { - if (setup_cgroup_environment()) { - fprintf(stderr, "ERROR: cgroup env failed\n"); - return -EINVAL; - } - - cg_fd = create_and_get_cgroup(CG_PATH); - if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, strerror(errno)); + cg_fd = cgroup_setup_and_join(CG_PATH); + if (cg_fd < 0) return cg_fd; - } - - if (join_cgroup(CG_PATH)) { - fprintf(stderr, "ERROR: failed to join cgroup\n"); - return -EINVAL; - } cg_created = 1; } diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index d196e2a4a6e0..a20a919244c0 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -1619,16 +1619,10 @@ int main(int argc, char **argv) int cgfd = -1; int err = 0; - if (setup_cgroup_environment()) - goto err; - - cgfd = create_and_get_cgroup(CG_PATH); + cgfd = cgroup_setup_and_join(CG_PATH); if (cgfd < 0) goto err; - if (join_cgroup(CG_PATH)) - goto err; - if (run_tests(cgfd)) goto err; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 3ae127620463..74a9e49988b6 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -102,16 +102,10 @@ int main(int argc, char **argv) __u32 key = 0; int rv; - if (setup_cgroup_environment()) - goto err; - - cg_fd = create_and_get_cgroup(cg_path); + cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; - if (join_cgroup(cg_path)) - goto err; - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index f9765ddf0761..8549b31716ab 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -86,16 +86,10 @@ int main(int argc, char **argv) CPU_SET(0, &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - if (setup_cgroup_environment()) - goto err; - - cg_fd = create_and_get_cgroup(cg_path); + cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; - if (join_cgroup(cg_path)) - goto err; - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index c4b17e08d431..dd80f0c84afb 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -10,52 +10,72 @@ # | xdp forwarding | # ------------------ -cleanup() +ret=0 + +setup() { - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_redirect [PASS]"; - else - echo "selftests: test_xdp_redirect [FAILED]"; - fi - set +e + local xdpmode=$1 + + ip netns add ns1 + ip netns add ns2 + + ip link add veth1 index 111 type veth peer name veth11 netns ns1 + ip link add veth2 index 222 type veth peer name veth22 netns ns2 + + ip link set veth1 up + ip link set veth2 up + ip -n ns1 link set dev veth11 up + ip -n ns2 link set dev veth22 up + + ip -n ns1 addr add 10.1.1.11/24 dev veth11 + ip -n ns2 addr add 10.1.1.22/24 dev veth22 +} + +cleanup() +{ ip link del veth1 2> /dev/null ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } -ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" - exit 0 -fi -set -e - -ip netns add ns1 -ip netns add ns2 +test_xdp_redirect() +{ + local xdpmode=$1 -trap cleanup 0 2 3 6 9 + setup -ip link add veth1 index 111 type veth peer name veth11 -ip link add veth2 index 222 type veth peer name veth22 + ip link set dev veth1 $xdpmode off &> /dev/null + if [ $? -ne 0 ];then + echo "selftests: test_xdp_redirect $xdpmode [SKIP]" + return 0 + fi -ip link set veth11 netns ns1 -ip link set veth22 netns ns2 + ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null + ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null -ip link set veth1 up -ip link set veth2 up + ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null + local ret1=$? + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null + local ret2=$? -ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11 -ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22 + if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then + echo "selftests: test_xdp_redirect $xdpmode [PASS]"; + else + ret=1 + echo "selftests: test_xdp_redirect $xdpmode [FAILED]"; + fi -ip netns exec ns1 ip link set dev veth11 up -ip netns exec ns2 ip link set dev veth22 up + cleanup +} -ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222 -ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111 +set -e +trap cleanup 2 3 6 9 -ip netns exec ns1 ping -c 1 10.1.1.22 -ip netns exec ns2 ping -c 1 10.1.1.11 +test_xdp_redirect xdpgeneric +test_xdp_redirect xdpdrv -exit 0 +exit $ret diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 0af6337a8962..800d503e5cb4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } + +__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) +{ + __u32 info_len = sizeof(*info); + int err; + + memset(info, 0, sizeof(*info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len); + if (err) { + printf("failed to get link info: %d\n", -errno); + return 0; + } + return info->prog_id; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 923b51762759..d4f8e749611b 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (C) 2020 Facebook, Inc. */ #include <stdbool.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> int parse_num_list(const char *s, bool **set, int *set_len); +__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); |