summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/index.rst21
-rw-r--r--Documentation/bpf/map_cgroup_storage.rst169
-rw-r--r--Documentation/networking/filter.rst2
-rw-r--r--arch/arm64/include/asm/extable.h12
-rw-r--r--arch/arm64/mm/extable.c12
-rw-r--r--arch/arm64/net/bpf_jit_comp.c93
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c3
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c6
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c5
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c18
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_filter.c3
-rw-r--r--drivers/net/ethernet/sfc/efx.c5
-rw-r--r--drivers/net/ethernet/socionext/netsec.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c3
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c21
-rw-r--r--drivers/net/netdevsim/bpf.c4
-rw-r--r--drivers/net/netdevsim/netdevsim.h2
-rw-r--r--drivers/net/tun.c15
-rw-r--r--drivers/net/veth.c15
-rw-r--r--drivers/net/virtio_net.c17
-rw-r--r--drivers/net/xen-netfront.c21
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--include/linux/bpf-cgroup.h12
-rw-r--r--include/linux/bpf.h127
-rw-r--r--include/linux/filter.h3
-rw-r--r--include/linux/netdevice.h29
-rw-r--r--include/linux/proc_fs.h3
-rw-r--r--include/net/xdp.h2
-rw-r--r--include/uapi/linux/bpf.h24
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c138
-rw-r--r--kernel/bpf/bpf_iter.c85
-rw-r--r--kernel/bpf/btf.c13
-rw-r--r--kernel/bpf/cgroup.c82
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/hashtab.c194
-rw-r--r--kernel/bpf/local_storage.c216
-rw-r--r--kernel/bpf/map_iter.c78
-rw-r--r--kernel/bpf/net_namespace.c8
-rw-r--r--kernel/bpf/prog_iter.c107
-rw-r--r--kernel/bpf/stackmap.c183
-rw-r--r--kernel/bpf/syscall.c52
-rw-r--r--kernel/bpf/task_iter.c24
-rw-r--r--kernel/bpf/verifier.c96
-rw-r--r--kernel/events/core.c18
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--net/bpf/test_run.c43
-rw-r--r--net/core/bpf_sk_storage.c216
-rw-r--r--net/core/dev.c539
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/xdp.c9
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/udp.c14
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst18
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst8
-rw-r--r--tools/bpf/bpftool/Makefile3
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool22
-rw-r--r--tools/bpf/bpftool/btf.c56
-rw-r--r--tools/bpf/bpftool/feature.c8
-rw-r--r--tools/bpf/bpftool/iter.c33
-rw-r--r--tools/bpf/bpftool/link.c37
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/bpf/resolve_btfids/.gitignore4
-rw-r--r--tools/bpf/resolve_btfids/main.c58
-rw-r--r--tools/build/Build.include3
-rw-r--r--tools/include/uapi/linux/bpf.h24
-rw-r--r--tools/lib/bpf/bpf.c11
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_tracing.h4
-rw-r--r--tools/lib/bpf/btf.c118
-rw-r--r--tools/lib/bpf/btf.h5
-rw-r--r--tools/lib/bpf/btf_dump.c2
-rw-r--r--tools/lib/bpf/libbpf.c36
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/lib/bpf/libbpf.map5
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c455
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c417
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_retro.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c109
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c151
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c100
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c46
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c34
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c35
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi.h13
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c33
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c57
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_retro.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c12
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c19
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py2
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c17
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c15
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c21
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c8
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c14
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c8
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c18
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c8
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh84
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h3
135 files changed, 4603 insertions, 1013 deletions
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 38b4db8be7a2..44ca8ea5a723 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -5,10 +5,10 @@ BPF Documentation
This directory contains documentation for the BPF (Berkeley Packet
Filter) facility, with a focus on the extended BPF version (eBPF).
-This kernel side documentation is still work in progress. The main
+This kernel side documentation is still work in progress. The main
textual documentation is (for historical reasons) described in
-`Documentation/networking/filter.rst`_, which describe both classical
-and extended BPF instruction-set.
+:ref:`networking-filter`, which describe both classical and extended
+BPF instruction-set.
The Cilium project also maintains a `BPF and XDP Reference Guide`_
that goes into great technical depth about the BPF Architecture.
@@ -48,6 +48,15 @@ Program types
bpf_lsm
+Map types
+=========
+
+.. toctree::
+ :maxdepth: 1
+
+ map_cgroup_storage
+
+
Testing and debugging BPF
=========================
@@ -59,7 +68,7 @@ Testing and debugging BPF
.. Links:
-.. _Documentation/networking/filter.rst: ../networking/filter.txt
+.. _networking-filter: ../networking/filter.rst
.. _man-pages: https://www.kernel.org/doc/man-pages/
-.. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
-.. _BPF and XDP Reference Guide: http://cilium.readthedocs.io/en/latest/bpf/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
+.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/
diff --git a/Documentation/bpf/map_cgroup_storage.rst b/Documentation/bpf/map_cgroup_storage.rst
new file mode 100644
index 000000000000..cab9543017bf
--- /dev/null
+++ b/Documentation/bpf/map_cgroup_storage.rst
@@ -0,0 +1,169 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2020 Google LLC.
+
+===========================
+BPF_MAP_TYPE_CGROUP_STORAGE
+===========================
+
+The ``BPF_MAP_TYPE_CGROUP_STORAGE`` map type represents a local fix-sized
+storage. It is only available with ``CONFIG_CGROUP_BPF``, and to programs that
+attach to cgroups; the programs are made available by the same Kconfig. The
+storage is identified by the cgroup the program is attached to.
+
+The map provide a local storage at the cgroup that the BPF program is attached
+to. It provides a faster and simpler access than the general purpose hash
+table, which performs a hash table lookups, and requires user to track live
+cgroups on their own.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGROUP_STORAGE`` map type. Some of its behaviors was changed in
+Linux 5.9 and this document will describe the differences.
+
+Usage
+=====
+
+The map uses key of type of either ``__u64 cgroup_inode_id`` or
+``struct bpf_cgroup_storage_key``, declared in ``linux/bpf.h``::
+
+ struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id;
+ __u32 attach_type;
+ };
+
+``cgroup_inode_id`` is the inode id of the cgroup directory.
+``attach_type`` is the the program's attach type.
+
+Linux 5.9 added support for type ``__u64 cgroup_inode_id`` as the key type.
+When this key type is used, then all attach types of the particular cgroup and
+map will share the same storage. Otherwise, if the type is
+``struct bpf_cgroup_storage_key``, then programs of different attach types
+be isolated and see different storages.
+
+To access the storage in a program, use ``bpf_get_local_storage``::
+
+ void *bpf_get_local_storage(void *map, u64 flags)
+
+``flags`` is reserved for future use and must be 0.
+
+There is no implicit synchronization. Storages of ``BPF_MAP_TYPE_CGROUP_STORAGE``
+can be accessed by multiple programs across different CPUs, and user should
+take care of synchronization by themselves. The bpf infrastructure provides
+``struct bpf_spin_lock`` to synchronize the storage. See
+``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
+
+Examples
+========
+
+Usage with key type as ``struct bpf_cgroup_storage_key``::
+
+ #include <bpf/bpf.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+ } cgroup_storage SEC(".maps");
+
+ int program(struct __sk_buff *skb)
+ {
+ __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+Userspace accessing map declared above::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+ {
+ struct bpf_cgroup_storage_key = {
+ .cgroup_inode_id = cgrp,
+ .attach_type = type,
+ };
+ __u32 value;
+ bpf_map_lookup_elem(bpf_map__fd(map), &key, &value);
+ // error checking omitted
+ return value;
+ }
+
+Alternatively, using just ``__u64 cgroup_inode_id`` as key type::
+
+ #include <bpf/bpf.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, __u32);
+ } cgroup_storage SEC(".maps");
+
+ int program(struct __sk_buff *skb)
+ {
+ __u32 *ptr = bpf_get_local_storage(&cgroup_storage, 0);
+ __sync_fetch_and_add(ptr, 1);
+
+ return 0;
+ }
+
+And userspace::
+
+ #include <linux/bpf.h>
+ #include <linux/libbpf.h>
+
+ __u32 map_lookup(struct bpf_map *map, __u64 cgrp, enum bpf_attach_type type)
+ {
+ __u32 value;
+ bpf_map_lookup_elem(bpf_map__fd(map), &cgrp, &value);
+ // error checking omitted
+ return value;
+ }
+
+Semantics
+=========
+
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE`` is a variant of this map type. This
+per-CPU variant will have different memory regions for each CPU for each
+storage. The non-per-CPU will have the same memory region for each storage.
+
+Prior to Linux 5.9, the lifetime of a storage is precisely per-attachment, and
+for a single ``CGROUP_STORAGE`` map, there can be at most one program loaded
+that uses the map. A program may be attached to multiple cgroups or have
+multiple attach types, and each attach creates a fresh zeroed storage. The
+storage is freed upon detach.
+
+There is a one-to-one association between the map of each type (per-CPU and
+non-per-CPU) and the BPF program during load verification time. As a result,
+each map can only be used by one BPF program and each BPF program can only use
+one storage map of each type. Because of map can only be used by one BPF
+program, sharing of this cgroup's storage with other BPF programs were
+impossible.
+
+Since Linux 5.9, storage can be shared by multiple programs. When a program is
+attached to a cgroup, the kernel would create a new storage only if the map
+does not already contain an entry for the cgroup and attach type pair, or else
+the old storage is reused for the new attachment. If the map is attach type
+shared, then attach type is simply ignored during comparison. Storage is freed
+only when either the map or the cgroup attached to is being freed. Detaching
+will not directly free the storage, but it may cause the reference to the map
+to reach zero and indirectly freeing all storage in the map.
+
+The map is not associated with any BPF program, thus making sharing possible.
+However, the BPF program can still only associate with one map of each type
+(per-CPU and non-per-CPU). A BPF program cannot use more than one
+``BPF_MAP_TYPE_CGROUP_STORAGE`` or more than one
+``BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE``.
+
+In all versions, userspace may use the the attach parameters of cgroup and
+attach type pair in ``struct bpf_cgroup_storage_key`` as the key to the BPF map
+APIs to read or update the storage for a given attachment. For Linux 5.9
+attach type shared storages, only the first value in the struct, cgroup inode
+id, is used during comparison, so userspace may just specify a ``__u64``
+directly.
+
+The storage is bound at attach time. Even if the program is attached to parent
+and triggers in child, the storage still belongs to the parent.
+
+Userspace cannot create a new entry in the map or delete an existing entry.
+Program test runs always use a temporary storage.
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index a1d3e192b9fa..debb59e374de 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
+.. _networking-filter:
+
=======================================================
Linux Socket Filtering aka Berkeley Packet Filter (BPF)
=======================================================
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 56a4f68b262e..840a35ed92ec 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -22,5 +22,17 @@ struct exception_table_entry
#define ARCH_HAS_RELATIVE_EXTABLE
+#ifdef CONFIG_BPF_JIT
+int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs);
+#else /* !CONFIG_BPF_JIT */
+static inline
+int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return 0;
+}
+#endif /* !CONFIG_BPF_JIT */
+
extern int fixup_exception(struct pt_regs *regs);
#endif
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 81e694af5f8c..eee1732ab6cd 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -11,8 +11,14 @@ int fixup_exception(struct pt_regs *regs)
const struct exception_table_entry *fixup;
fixup = search_exception_tables(instruction_pointer(regs));
- if (fixup)
- regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
+ if (!fixup)
+ return 0;
- return fixup != NULL;
+ if (IS_ENABLED(CONFIG_BPF_JIT) &&
+ regs->pc >= BPF_JIT_REGION_START &&
+ regs->pc < BPF_JIT_REGION_END)
+ return arm64_bpf_fixup_exception(fixup, regs);
+
+ regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
+ return 1;
}
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 3cb25b43b368..f8912e45be7a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "bpf_jit: " fmt
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
@@ -56,6 +57,7 @@ struct jit_ctx {
int idx;
int epilogue_offset;
int *offset;
+ int exentry_idx;
__le32 *image;
u32 stack_size;
};
@@ -351,6 +353,67 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_RET(A64_LR), ctx);
}
+#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
+#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+
+int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+ int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+
+ regs->regs[dst_reg] = 0;
+ regs->pc = (unsigned long)&ex->fixup - offset;
+ return 1;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+ struct jit_ctx *ctx,
+ int dst_reg)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex;
+
+ if (!ctx->image)
+ /* First pass */
+ return 0;
+
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM)
+ return 0;
+
+ if (!ctx->prog->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
+ return -EINVAL;
+
+ ex = &ctx->prog->aux->extable[ctx->exentry_idx];
+ pc = (unsigned long)&ctx->image[ctx->idx - 1];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = offset;
+
+ /*
+ * Since the extable follows the program, the fixup offset is always
+ * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
+ * to keep things simple, and put the destination register in the upper
+ * bits. We don't need to worry about buildtime or runtime sort
+ * modifying the upper bits because the table is already sorted, and
+ * isn't part of the main exception table.
+ */
+ offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ return -ERANGE;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+
+ ctx->exentry_idx++;
+ return 0;
+}
+
/* JITs an eBPF instruction.
* Returns:
* 0 - successfully JITed an 8-byte eBPF instruction.
@@ -375,6 +438,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
u8 jmp_cond, reg;
s32 jmp_offset;
u32 a64_insn;
+ int ret;
#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -694,7 +758,6 @@ emit_cond_jmp:
const u8 r0 = bpf2a64[BPF_REG_0];
bool func_addr_fixed;
u64 func_addr;
- int ret;
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
@@ -738,6 +801,10 @@ emit_cond_jmp:
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
emit_a64_mov_i(1, tmp, off, ctx);
switch (BPF_SIZE(code)) {
case BPF_W:
@@ -753,6 +820,10 @@ emit_cond_jmp:
emit(A64_LDR64(dst, src, tmp), ctx);
break;
}
+
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
break;
/* ST: *(size *)(dst + off) = imm */
@@ -868,6 +939,9 @@ static int validate_code(struct jit_ctx *ctx)
return -1;
}
+ if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
+ return -1;
+
return 0;
}
@@ -884,6 +958,7 @@ struct arm64_jit_data {
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ int image_size, prog_size, extable_size;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
@@ -891,7 +966,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
- int image_size;
u8 *image_ptr;
if (!prog->jit_requested)
@@ -922,7 +996,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
image_ptr = jit_data->image;
header = jit_data->header;
extra_pass = true;
- image_size = sizeof(u32) * ctx.idx;
+ prog_size = sizeof(u32) * ctx.idx;
goto skip_init_ctx;
}
memset(&ctx, 0, sizeof(ctx));
@@ -950,8 +1024,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.epilogue_offset = ctx.idx;
build_epilogue(&ctx);
+ extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
/* Now we know the actual image size. */
- image_size = sizeof(u32) * ctx.idx;
+ prog_size = sizeof(u32) * ctx.idx;
+ image_size = prog_size + extable_size;
header = bpf_jit_binary_alloc(image_size, &image_ptr,
sizeof(u32), jit_fill_hole);
if (header == NULL) {
@@ -962,8 +1040,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */
ctx.image = (__le32 *)image_ptr;
+ if (extable_size)
+ prog->aux->extable = (void *)image_ptr + prog_size;
skip_init_ctx:
ctx.idx = 0;
+ ctx.exentry_idx = 0;
build_prologue(&ctx, was_classic);
@@ -984,7 +1065,7 @@ skip_init_ctx:
/* And we're done. */
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+ bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
bpf_flush_icache(header, ctx.image + ctx.idx);
@@ -1005,7 +1086,7 @@ skip_init_ctx:
}
prog->bpf_func = (void *)ctx.image;
prog->jited = 1;
- prog->jited_len = image_size;
+ prog->jited_len = prog_size;
if (!prog->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(prog, ctx.offset);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6478c1e0d137..2a6c9725e092 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -576,15 +576,9 @@ static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
*/
static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
{
- struct ena_adapter *adapter = netdev_priv(netdev);
-
switch (bpf->command) {
case XDP_SETUP_PROG:
return ena_xdp_set(netdev, bpf);
- case XDP_QUERY_PROG:
- bpf->prog_id = adapter->xdp_bpf_prog ?
- adapter->xdp_bpf_prog->aux->id : 0;
- break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 5e3b4a3b69ea..2704a4709bc7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -330,10 +330,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_SETUP_PROG:
rc = bnxt_xdp_set(bp, xdp->prog);
break;
- case XDP_QUERY_PROG:
- xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0;
- rc = 0;
- break;
default:
rc = -EINVAL;
break;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 2ba0ce115e63..1c6163934e20 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1906,9 +1906,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return nicvf_xdp_setup(nic, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 50f52fe2012f..83b1e974bff0 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2077,14 +2077,9 @@ out_err:
static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct dpaa2_eth_priv *priv = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return setup_xdp(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
- break;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index dadbfb3d2a2b..d8315811cbdf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12923,9 +12923,6 @@ static int i40e_xdp(struct net_device *dev,
switch (xdp->command) {
case XDP_SETUP_PROG:
return i40e_xdp_setup(vsi, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
- return 0;
case XDP_SETUP_XSK_UMEM:
return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 22e3d32463f1..8437d72795b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2549,9 +2549,6 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
- return 0;
case XDP_SETUP_XSK_UMEM:
return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e339edd0b593..2f8a4cfc5fa1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10160,10 +10160,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbe_xdp_setup(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = adapter->xdp_prog ?
- adapter->xdp_prog->aux->id : 0;
- return 0;
case XDP_SETUP_XSK_UMEM:
return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index c3d26cc0cf51..a428113e6d54 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4477,15 +4477,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct ixgbevf_adapter *adapter = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbevf_xdp_setup(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = adapter->xdp_prog ?
- adapter->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c9b6b0f85bb0..832bbb8b05c8 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4442,14 +4442,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct mvneta_port *pp = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 4298a029be55..2a8a5842eaef 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4657,9 +4657,6 @@ static int mvpp2_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return mvpp2_xdp_setup(port, xdp);
- case XDP_QUERY_PROG:
- xdp->prog_id = port->xdp_prog ? port->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 2b8608f8f0a9..106513f772c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2802,35 +2802,11 @@ unlock_out:
return err;
}
-static u32 mlx4_xdp_query(struct net_device *dev)
-{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_dev *mdev = priv->mdev;
- const struct bpf_prog *xdp_prog;
- u32 prog_id = 0;
-
- if (!priv->tx_ring_num[TX_XDP])
- return prog_id;
-
- mutex_lock(&mdev->state_lock);
- xdp_prog = rcu_dereference_protected(
- priv->rx_ring[0]->xdp_prog,
- lockdep_is_held(&mdev->state_lock));
- if (xdp_prog)
- prog_id = xdp_prog->aux->id;
- mutex_unlock(&mdev->state_lock);
-
- return prog_id;
-}
-
static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx4_xdp_set(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = mlx4_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8f26cd951ff5..aebcf73f8546 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4418,29 +4418,11 @@ unlock:
return err;
}
-static u32 mlx5e_xdp_query(struct net_device *dev)
-{
- struct mlx5e_priv *priv = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- u32 prog_id = 0;
-
- mutex_lock(&priv->state_lock);
- xdp_prog = priv->channels.params.xdp_prog;
- if (xdp_prog)
- prog_id = xdp_prog->aux->id;
- mutex_unlock(&priv->state_lock);
-
- return prog_id;
-}
-
static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx5e_xdp_set(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = mlx5e_xdp_query(dev);
- return 0;
case XDP_SETUP_XSK_UMEM:
return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 44608873d3d9..39ee23e8c0bf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3614,10 +3614,6 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
return nfp_net_xdp_setup_drv(nn, xdp);
case XDP_SETUP_PROG_HW:
return nfp_net_xdp_setup_hw(nn, xdp);
- case XDP_QUERY_PROG:
- return xdp_attachment_query(&nn->xdp, xdp);
- case XDP_QUERY_PROG_HW:
- return xdp_attachment_query(&nn->xdp_hw, xdp);
default:
return nfp_app_bpf(nn->app, nn, xdp);
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index b7d0b6ccebd3..f961f65d9372 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1045,9 +1045,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return qede_xdp_set(edev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index f5aa1bd02f19..e06fa89f2d72 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -654,15 +654,10 @@ static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct efx_nic *efx = netdev_priv(dev);
- struct bpf_prog *xdp_prog;
switch (xdp->command) {
case XDP_SETUP_PROG:
return efx_xdp_setup_prog(efx, xdp->prog);
- case XDP_QUERY_PROG:
- xdp_prog = rtnl_dereference(efx->xdp_prog);
- xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 0f366cc50b74..25db667fa879 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1811,9 +1811,6 @@ static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return netsec_xdp_setup(priv, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index a399f3659346..d6d7a7d9c7ad 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1286,9 +1286,6 @@ int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
case XDP_SETUP_PROG:
return cpsw_xdp_prog_setup(priv, bpf);
- case XDP_QUERY_PROG:
- return xdp_attachment_query(&priv->xdpi, bpf);
-
default:
return -EINVAL;
}
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 8e4141552423..440486d9c999 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -163,16 +163,6 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
return ret;
}
-static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
-{
- struct bpf_prog *prog = netvsc_xdp_get(nvdev);
-
- if (prog)
- return prog->aux->id;
-
- return 0;
-}
-
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct net_device_context *ndevctx = netdev_priv(dev);
@@ -182,12 +172,7 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
int ret;
if (!nvdev || nvdev->destroy) {
- if (bpf->command == XDP_QUERY_PROG) {
- bpf->prog_id = 0;
- return 0; /* Query must always succeed */
- } else {
- return -ENODEV;
- }
+ return -ENODEV;
}
switch (bpf->command) {
@@ -208,10 +193,6 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
return ret;
- case XDP_QUERY_PROG:
- bpf->prog_id = netvsc_xdp_query(nvdev);
- return 0;
-
default:
return -EINVAL;
}
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 0b362b8dac17..2e90512f3bbe 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -551,10 +551,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
ASSERT_RTNL();
switch (bpf->command) {
- case XDP_QUERY_PROG:
- return xdp_attachment_query(&ns->xdp, bpf);
- case XDP_QUERY_PROG_HW:
- return xdp_attachment_query(&ns->xdp_hw, bpf);
case XDP_SETUP_PROG:
err = nsim_setup_prog_checks(ns, bpf);
if (err)
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d164052e0393..284f7092241d 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -121,7 +121,7 @@ static inline void nsim_bpf_uninit(struct netdevsim *ns)
static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
- return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int nsim_bpf_disable_tc(struct netdevsim *ns)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9b4b25358f9b..3c11a77f5709 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1184,26 +1184,11 @@ static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
return 0;
}
-static u32 tun_xdp_query(struct net_device *dev)
-{
- struct tun_struct *tun = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
-
- xdp_prog = rtnl_dereference(tun->xdp_prog);
- if (xdp_prog)
- return xdp_prog->aux->id;
-
- return 0;
-}
-
static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return tun_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = tun_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b594f03eeddb..e56cd562a664 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1198,26 +1198,11 @@ err:
return err;
}
-static u32 veth_xdp_query(struct net_device *dev)
-{
- struct veth_priv *priv = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
-
- xdp_prog = priv->_xdp_prog;
- if (xdp_prog)
- return xdp_prog->aux->id;
-
- return 0;
-}
-
static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return veth_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = veth_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ba38765dc490..6fa8fe5ef160 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2490,28 +2490,11 @@ err:
return err;
}
-static u32 virtnet_xdp_query(struct net_device *dev)
-{
- struct virtnet_info *vi = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- int i;
-
- for (i = 0; i < vi->max_queue_pairs; i++) {
- xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
- if (xdp_prog)
- return xdp_prog->aux->id;
- }
- return 0;
-}
-
static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = virtnet_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a63e550c370e..458be6882b98 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1480,32 +1480,11 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
return 0;
}
-static u32 xennet_xdp_query(struct net_device *dev)
-{
- unsigned int num_queues = dev->real_num_tx_queues;
- struct netfront_info *np = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- struct netfront_queue *queue;
- unsigned int i;
-
- for (i = 0; i < num_queues; ++i) {
- queue = &np->queues[i];
- xdp_prog = rtnl_dereference(queue->xdp_prog);
- if (xdp_prog)
- return xdp_prog->aux->id;
- }
-
- return 0;
-}
-
static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return xennet_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = xennet_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index dba63b2429f0..ed8a6306990c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -98,7 +98,7 @@ static const struct proc_ops proc_net_seq_ops = {
.proc_release = seq_release_net,
};
-int bpf_iter_init_seq_net(void *priv_data)
+int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux)
{
#ifdef CONFIG_NET_NS
struct seq_net_private *p = priv_data;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2c6f26670acc..64f367044e25 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -46,7 +46,8 @@ struct bpf_cgroup_storage {
};
struct bpf_cgroup_storage_map *map;
struct bpf_cgroup_storage_key key;
- struct list_head list;
+ struct list_head list_map;
+ struct list_head list_cg;
struct rb_node node;
struct rcu_head rcu;
};
@@ -78,6 +79,9 @@ struct cgroup_bpf {
struct list_head progs[MAX_BPF_ATTACH_TYPE];
u32 flags[MAX_BPF_ATTACH_TYPE];
+ /* list of cgroup shared storages */
+ struct list_head storages;
+
/* temp storage for effective prog array used by prog_attach/detach */
struct bpf_prog_array *inactive;
@@ -161,6 +165,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
}
+struct bpf_cgroup_storage *
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
+ void *key, bool locked);
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
enum bpf_cgroup_storage_type stype);
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
@@ -169,7 +176,6 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
enum bpf_attach_type type);
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
-void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map);
int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
@@ -383,8 +389,6 @@ static inline void bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
-static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux,
- struct bpf_map *map) {}
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
static inline void bpf_cgroup_storage_free(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bae557ff2da8..cef4ef0d2b4e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -33,10 +33,21 @@ struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
+struct bpf_iter_aux_info;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
+typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
+ struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+struct bpf_iter_seq_info {
+ const struct seq_operations *seq_ops;
+ bpf_iter_init_seq_priv_t init_seq_private;
+ bpf_iter_fini_seq_priv_t fini_seq_private;
+ u32 seq_priv_size;
+};
+
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
@@ -96,6 +107,9 @@ struct bpf_map_ops {
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
+
+ /* bpf_iter info used to open a seq_file */
+ const struct bpf_iter_seq_info *iter_seq_info;
};
struct bpf_map_memory {
@@ -342,6 +356,10 @@ enum bpf_reg_type {
PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
+ PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
+ PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
+ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
+ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@@ -683,6 +701,8 @@ struct bpf_prog_aux {
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
+ u32 max_rdonly_access;
+ u32 max_rdwr_access;
const struct bpf_ctx_arg_aux *ctx_arg_info;
struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
@@ -761,6 +781,33 @@ struct bpf_array_aux {
struct work_struct work;
};
+struct bpf_link {
+ atomic64_t refcnt;
+ u32 id;
+ enum bpf_link_type type;
+ const struct bpf_link_ops *ops;
+ struct bpf_prog *prog;
+ struct work_struct work;
+};
+
+struct bpf_link_ops {
+ void (*release)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
+ int (*detach)(struct bpf_link *link);
+ int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog);
+ void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
+ int (*fill_link_info)(const struct bpf_link *link,
+ struct bpf_link_info *info);
+};
+
+struct bpf_link_primer {
+ struct bpf_link *link;
+ struct file *file;
+ int fd;
+ u32 id;
+};
+
struct bpf_struct_ops_value;
struct btf_type;
struct btf_member;
@@ -1117,6 +1164,7 @@ int generic_map_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
+struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
extern int sysctl_unprivileged_bpf_disabled;
@@ -1143,32 +1191,6 @@ static inline bool bpf_bypass_spec_v4(void)
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
-struct bpf_link {
- atomic64_t refcnt;
- u32 id;
- enum bpf_link_type type;
- const struct bpf_link_ops *ops;
- struct bpf_prog *prog;
- struct work_struct work;
-};
-
-struct bpf_link_primer {
- struct bpf_link *link;
- struct file *file;
- int fd;
- u32 id;
-};
-
-struct bpf_link_ops {
- void (*release)(struct bpf_link *link);
- void (*dealloc)(struct bpf_link *link);
- int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
- struct bpf_prog *old_prog);
- void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
- int (*fill_link_info)(const struct bpf_link *link,
- struct bpf_link_info *info);
-};
-
void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog);
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
@@ -1188,18 +1210,21 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; }
-typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
-typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+struct bpf_iter_aux_info {
+ struct bpf_map *map;
+};
+
+typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux);
#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
const char *target;
- const struct seq_operations *seq_ops;
- bpf_iter_init_seq_priv_t init_seq_private;
- bpf_iter_fini_seq_priv_t fini_seq_private;
- u32 seq_priv_size;
+ bpf_iter_check_target_t check_target;
u32 ctx_arg_info_size;
+ enum bpf_iter_link_info req_linfo;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
+ const struct bpf_iter_seq_info *seq_info;
};
struct bpf_iter_meta {
@@ -1208,6 +1233,13 @@ struct bpf_iter_meta {
u64 seq_num;
};
+struct bpf_iter__bpf_map_elem {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(void *, key);
+ __bpf_md_ptr(void *, value);
+};
+
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
@@ -1370,6 +1402,35 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
{
}
+static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops,
+ struct bpf_prog *prog)
+{
+}
+
+static inline int bpf_link_prime(struct bpf_link *link,
+ struct bpf_link_primer *primer)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int bpf_link_settle(struct bpf_link_primer *primer)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void bpf_link_cleanup(struct bpf_link_primer *primer)
+{
+}
+
+static inline void bpf_link_inc(struct bpf_link *link)
+{
+}
+
+static inline void bpf_link_put(struct bpf_link *link)
+{
+}
+
static inline int bpf_obj_get_user(const char __user *pathname, int flags)
{
return -EOPNOTSUPP;
@@ -1644,6 +1705,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
+extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
+extern const struct bpf_func_proto bpf_get_stack_proto_pe;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d07a6e973a7d..0a355b005bf4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -533,7 +533,8 @@ struct bpf_prog {
is_func:1, /* program is a bpf function */
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1, /* callchain buffer allocated? */
- enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
+ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
+ call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba0fa6b22787..88d40b9abaa1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -876,8 +876,6 @@ enum bpf_netdev_command {
*/
XDP_SETUP_PROG,
XDP_SETUP_PROG_HW,
- XDP_QUERY_PROG,
- XDP_QUERY_PROG_HW,
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
@@ -888,6 +886,19 @@ struct bpf_prog_offload_ops;
struct netlink_ext_ack;
struct xdp_umem;
struct xdp_dev_bulk_queue;
+struct bpf_xdp_link;
+
+enum bpf_xdp_mode {
+ XDP_MODE_SKB = 0,
+ XDP_MODE_DRV = 1,
+ XDP_MODE_HW = 2,
+ __MAX_XDP_MODE
+};
+
+struct bpf_xdp_entity {
+ struct bpf_prog *prog;
+ struct bpf_xdp_link *link;
+};
struct netdev_bpf {
enum bpf_netdev_command command;
@@ -898,12 +909,6 @@ struct netdev_bpf {
struct bpf_prog *prog;
struct netlink_ext_ack *extack;
};
- /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
- struct {
- u32 prog_id;
- /* flags with which program was installed */
- u32 prog_flags;
- };
/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
struct {
struct bpf_offloaded_map *offmap;
@@ -2144,6 +2149,9 @@ struct net_device {
#endif
const struct udp_tunnel_nic_info *udp_tunnel_nic_info;
struct udp_tunnel_nic *udp_tunnel_nic;
+
+ /* protected by rtnl_lock */
+ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE];
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -3821,8 +3829,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
- enum bpf_netdev_command cmd);
+int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
+
int xdp_umem_query(struct net_device *dev, u16 queue_id);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d1eed1b43651..2df965cd0974 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
-extern int bpf_iter_init_seq_net(void *priv_data);
+struct bpf_iter_aux_info;
+extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux);
extern void bpf_iter_fini_seq_net(void *priv_data);
#ifdef CONFIG_PROC_PID_ARCH_STATUS
diff --git a/include/net/xdp.h b/include/net/xdp.h
index dbe9c60797e1..3814fb631d52 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -240,8 +240,6 @@ struct xdp_attachment_info {
};
struct netdev_bpf;
-int xdp_attachment_query(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf);
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
struct netdev_bpf *bpf);
void xdp_attachment_setup(struct xdp_attachment_info *info,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54d0c886e3ba..b134e679e9db 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_cmd {
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
};
enum bpf_map_type {
@@ -230,6 +231,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET_SOCK_RELEASE,
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
+ BPF_XDP,
__MAX_BPF_ATTACH_TYPE
};
@@ -242,10 +244,18 @@ enum bpf_link_type {
BPF_LINK_TYPE_CGROUP = 3,
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
+ BPF_LINK_TYPE_XDP = 6,
MAX_BPF_LINK_TYPE,
};
+enum bpf_iter_link_info {
+ BPF_ITER_LINK_UNSPEC = 0,
+ BPF_ITER_LINK_MAP_FD = 1,
+
+ MAX_BPF_ITER_LINK_INFO,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -607,7 +617,10 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
- __u32 target_fd; /* object to attach to */
+ union {
+ __u32 target_fd; /* object to attach to */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
} link_create;
@@ -622,6 +635,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
@@ -3229,7 +3246,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
@@ -4057,6 +4074,9 @@ struct bpf_link_info {
__u32 netns_ino;
__u32 attach_type;
} netns;
+ struct {
+ __u32 ifindex;
+ } xdp;
};
} __attribute__((aligned(8)));
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 1131a921e1a6..e6eb9c0402da 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c66e8273fccd..8ff419b632a6 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
vma->vm_pgoff + pgoff);
}
+struct bpf_iter_seq_array_map_info {
+ struct bpf_map *map;
+ void *percpu_value_buf;
+ u32 index;
+};
+
+static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_map *map = info->map;
+ struct bpf_array *array;
+ u32 index;
+
+ if (info->index >= map->max_entries)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ array = container_of(map, struct bpf_array, map);
+ index = info->index & array->index_mask;
+ if (info->percpu_value_buf)
+ return array->pptrs[index];
+ return array->value + array->elem_size * index;
+}
+
+static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_map *map = info->map;
+ struct bpf_array *array;
+ u32 index;
+
+ ++*pos;
+ ++info->index;
+ if (info->index >= map->max_entries)
+ return NULL;
+
+ array = container_of(map, struct bpf_array, map);
+ index = info->index & array->index_mask;
+ if (info->percpu_value_buf)
+ return array->pptrs[index];
+ return array->value + array->elem_size * index;
+}
+
+static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_array_map_info *info = seq->private;
+ struct bpf_iter__bpf_map_elem ctx = {};
+ struct bpf_map *map = info->map;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int off = 0, cpu = 0;
+ void __percpu **pptr;
+ u32 size;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, v == NULL);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+
+ if (!info->percpu_value_buf) {
+ ctx.value = v;
+ } else {
+ pptr = v;
+ size = round_up(map->value_size, 8);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu),
+ size);
+ off += size;
+ }
+ ctx.value = info->percpu_value_buf;
+ }
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_array_map_seq_show(seq, v);
+}
+
+static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_array_map_seq_show(seq, NULL);
+}
+
+static int bpf_iter_init_array_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_array_map_info *seq_info = priv_data;
+ struct bpf_map *map = aux->map;
+ void *value_buf;
+ u32 buf_size;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+ if (!value_buf)
+ return -ENOMEM;
+
+ seq_info->percpu_value_buf = value_buf;
+ }
+
+ seq_info->map = map;
+ return 0;
+}
+
+static void bpf_iter_fini_array_map(void *priv_data)
+{
+ struct bpf_iter_seq_array_map_info *seq_info = priv_data;
+
+ kfree(seq_info->percpu_value_buf);
+}
+
+static const struct seq_operations bpf_array_map_seq_ops = {
+ .start = bpf_array_map_seq_start,
+ .next = bpf_array_map_seq_next,
+ .stop = bpf_array_map_seq_stop,
+ .show = bpf_array_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_array_map_seq_ops,
+ .init_seq_private = bpf_iter_init_array_map,
+ .fini_seq_private = bpf_iter_fini_array_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
+};
+
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
@@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int percpu_array_map_btf_id;
@@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int fd_array_map_alloc_check(union bpf_attr *attr)
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index dd612b80b9fe..363b9cafc2d8 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -14,11 +14,13 @@ struct bpf_iter_target_info {
struct bpf_iter_link {
struct bpf_link link;
+ struct bpf_iter_aux_info aux;
struct bpf_iter_target_info *tinfo;
};
struct bpf_iter_priv_data {
struct bpf_iter_target_info *tinfo;
+ const struct bpf_iter_seq_info *seq_info;
struct bpf_prog *prog;
u64 session_id;
u64 seq_num;
@@ -35,7 +37,8 @@ static DEFINE_MUTEX(link_mutex);
/* incremented on every opened seq_file */
static atomic64_t session_id;
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
+ const struct bpf_iter_seq_info *seq_info);
static void bpf_iter_inc_seq_num(struct seq_file *seq)
{
@@ -199,11 +202,25 @@ done:
return copied;
}
+static const struct bpf_iter_seq_info *
+__get_seq_info(struct bpf_iter_link *link)
+{
+ const struct bpf_iter_seq_info *seq_info;
+
+ if (link->aux.map) {
+ seq_info = link->aux.map->ops->iter_seq_info;
+ if (seq_info)
+ return seq_info;
+ }
+
+ return link->tinfo->reg_info->seq_info;
+}
+
static int iter_open(struct inode *inode, struct file *file)
{
struct bpf_iter_link *link = inode->i_private;
- return prepare_seq_file(file, link);
+ return prepare_seq_file(file, link, __get_seq_info(link));
}
static int iter_release(struct inode *inode, struct file *file)
@@ -218,8 +235,8 @@ static int iter_release(struct inode *inode, struct file *file)
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
target_private);
- if (iter_priv->tinfo->reg_info->fini_seq_private)
- iter_priv->tinfo->reg_info->fini_seq_private(seq->private);
+ if (iter_priv->seq_info->fini_seq_private)
+ iter_priv->seq_info->fini_seq_private(seq->private);
bpf_prog_put(iter_priv->prog);
seq->private = iter_priv;
@@ -318,6 +335,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
static void bpf_iter_link_release(struct bpf_link *link)
{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+
+ if (iter_link->aux.map)
+ bpf_map_put_with_uref(iter_link->aux.map);
}
static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -370,14 +392,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_link_primer link_primer;
struct bpf_iter_target_info *tinfo;
+ struct bpf_iter_aux_info aux = {};
struct bpf_iter_link *link;
+ u32 prog_btf_id, target_fd;
bool existed = false;
- u32 prog_btf_id;
+ struct bpf_map *map;
int err;
- if (attr->link_create.target_fd || attr->link_create.flags)
- return -EINVAL;
-
prog_btf_id = prog->aux->attach_btf_id;
mutex_lock(&targets_mutex);
list_for_each_entry(tinfo, &targets, list) {
@@ -390,6 +411,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (!existed)
return -ENOENT;
+ /* Make sure user supplied flags are target expected. */
+ target_fd = attr->link_create.target_fd;
+ if (attr->link_create.flags != tinfo->reg_info->req_linfo)
+ return -EINVAL;
+ if (!attr->link_create.flags && target_fd)
+ return -EINVAL;
+
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
if (!link)
return -ENOMEM;
@@ -403,21 +431,45 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
+ map = bpf_map_get_with_uref(target_fd);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto cleanup_link;
+ }
+
+ aux.map = map;
+ err = tinfo->reg_info->check_target(prog, &aux);
+ if (err) {
+ bpf_map_put_with_uref(map);
+ goto cleanup_link;
+ }
+
+ link->aux.map = map;
+ }
+
return bpf_link_settle(&link_primer);
+
+cleanup_link:
+ bpf_link_cleanup(&link_primer);
+ return err;
}
static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
struct bpf_iter_target_info *tinfo,
+ const struct bpf_iter_seq_info *seq_info,
struct bpf_prog *prog)
{
priv_data->tinfo = tinfo;
+ priv_data->seq_info = seq_info;
priv_data->prog = prog;
priv_data->session_id = atomic64_inc_return(&session_id);
priv_data->seq_num = 0;
priv_data->done_stop = false;
}
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
+ const struct bpf_iter_seq_info *seq_info)
{
struct bpf_iter_priv_data *priv_data;
struct bpf_iter_target_info *tinfo;
@@ -433,21 +485,21 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
tinfo = link->tinfo;
total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
- tinfo->reg_info->seq_priv_size;
- priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
+ seq_info->seq_priv_size;
+ priv_data = __seq_open_private(file, seq_info->seq_ops,
total_priv_dsize);
if (!priv_data) {
err = -ENOMEM;
goto release_prog;
}
- if (tinfo->reg_info->init_seq_private) {
- err = tinfo->reg_info->init_seq_private(priv_data->target_private);
+ if (seq_info->init_seq_private) {
+ err = seq_info->init_seq_private(priv_data->target_private, &link->aux);
if (err)
goto release_seq_file;
}
- init_seq_meta(priv_data, tinfo, prog);
+ init_seq_meta(priv_data, tinfo, seq_info, prog);
seq = file->private_data;
seq->private = priv_data->target_private;
@@ -463,6 +515,7 @@ release_prog:
int bpf_iter_new_fd(struct bpf_link *link)
{
+ struct bpf_iter_link *iter_link;
struct file *file;
unsigned int flags;
int err, fd;
@@ -481,8 +534,8 @@ int bpf_iter_new_fd(struct bpf_link *link)
goto free_fd;
}
- err = prepare_seq_file(file,
- container_of(link, struct bpf_iter_link, link));
+ iter_link = container_of(link, struct bpf_iter_link, link);
+ err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link));
if (err)
goto free_file;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4ff0de860ab7..91afdd4c82e3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3806,6 +3806,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
+
+ /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+ const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+
+ if (ctx_arg_info->offset == off &&
+ (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
+ ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
+ info->reg_type = ctx_arg_info->reg_type;
+ return true;
+ }
+ }
+
if (t->type == 0)
/* This is a pointer to void.
* It is the same as scalar from the verifier safety pov.
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ac53102e244a..83ff127ef7ae 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -37,17 +37,34 @@ static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
}
static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
- struct bpf_prog *prog)
+ struct bpf_cgroup_storage *new_storages[],
+ enum bpf_attach_type type,
+ struct bpf_prog *prog,
+ struct cgroup *cgrp)
{
enum bpf_cgroup_storage_type stype;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_map *map;
+
+ key.cgroup_inode_id = cgroup_id(cgrp);
+ key.attach_type = type;
for_each_cgroup_storage_type(stype) {
+ map = prog->aux->cgroup_storage[stype];
+ if (!map)
+ continue;
+
+ storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
+ if (storages[stype])
+ continue;
+
storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
if (IS_ERR(storages[stype])) {
- storages[stype] = NULL;
- bpf_cgroup_storages_free(storages);
+ bpf_cgroup_storages_free(new_storages);
return -ENOMEM;
}
+
+ new_storages[stype] = storages[stype];
}
return 0;
@@ -63,7 +80,7 @@ static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
}
static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
- struct cgroup* cgrp,
+ struct cgroup *cgrp,
enum bpf_attach_type attach_type)
{
enum bpf_cgroup_storage_type stype;
@@ -72,14 +89,6 @@ static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
}
-static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
-{
- enum bpf_cgroup_storage_type stype;
-
- for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_unlink(storages[stype]);
-}
-
/* Called when bpf_cgroup_link is auto-detached from dying cgroup.
* It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
* doesn't free link memory, which will eventually be done by bpf_link's
@@ -101,22 +110,23 @@ static void cgroup_bpf_release(struct work_struct *work)
struct cgroup *p, *cgrp = container_of(work, struct cgroup,
bpf.release_work);
struct bpf_prog_array *old_array;
+ struct list_head *storages = &cgrp->bpf.storages;
+ struct bpf_cgroup_storage *storage, *stmp;
+
unsigned int type;
mutex_lock(&cgroup_mutex);
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
struct list_head *progs = &cgrp->bpf.progs[type];
- struct bpf_prog_list *pl, *tmp;
+ struct bpf_prog_list *pl, *pltmp;
- list_for_each_entry_safe(pl, tmp, progs, node) {
+ list_for_each_entry_safe(pl, pltmp, progs, node) {
list_del(&pl->node);
if (pl->prog)
bpf_prog_put(pl->prog);
if (pl->link)
bpf_cgroup_link_auto_detach(pl->link);
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_free(pl->storage);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
@@ -126,6 +136,11 @@ static void cgroup_bpf_release(struct work_struct *work)
bpf_prog_array_free(old_array);
}
+ list_for_each_entry_safe(storage, stmp, storages, list_cg) {
+ bpf_cgroup_storage_unlink(storage);
+ bpf_cgroup_storage_free(storage);
+ }
+
mutex_unlock(&cgroup_mutex);
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
@@ -290,6 +305,8 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+ INIT_LIST_HEAD(&cgrp->bpf.storages);
+
for (i = 0; i < NR; i++)
if (compute_effective_progs(cgrp, i, &arrays[i]))
goto cleanup;
@@ -422,7 +439,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
- struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_prog_list *pl;
int err;
@@ -455,17 +472,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
if (IS_ERR(pl))
return PTR_ERR(pl);
- if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
+ if (bpf_cgroup_storages_alloc(storage, new_storage, type,
+ prog ? : link->link.prog, cgrp))
return -ENOMEM;
if (pl) {
old_prog = pl->prog;
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_assign(old_storage, pl->storage);
} else {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
- bpf_cgroup_storages_free(storage);
+ bpf_cgroup_storages_free(new_storage);
return -ENOMEM;
}
list_add_tail(&pl->node, progs);
@@ -480,12 +496,11 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
if (err)
goto cleanup;
- bpf_cgroup_storages_free(old_storage);
if (old_prog)
bpf_prog_put(old_prog);
else
static_branch_inc(&cgroup_bpf_enabled_key);
- bpf_cgroup_storages_link(pl->storage, cgrp, type);
+ bpf_cgroup_storages_link(new_storage, cgrp, type);
return 0;
cleanup:
@@ -493,9 +508,7 @@ cleanup:
pl->prog = old_prog;
pl->link = NULL;
}
- bpf_cgroup_storages_free(pl->storage);
- bpf_cgroup_storages_assign(pl->storage, old_storage);
- bpf_cgroup_storages_link(pl->storage, cgrp, type);
+ bpf_cgroup_storages_free(new_storage);
if (!old_prog) {
list_del(&pl->node);
kfree(pl);
@@ -679,8 +692,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
- bpf_cgroup_storages_unlink(pl->storage);
- bpf_cgroup_storages_free(pl->storage);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
@@ -803,6 +814,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
{
struct bpf_cgroup_link *cg_link =
container_of(link, struct bpf_cgroup_link, link);
+ struct cgroup *cg;
/* link might have been auto-detached by dying cgroup already,
* in that case our work is done here
@@ -821,8 +833,12 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
+ cg = cg_link->cgroup;
+ cg_link->cgroup = NULL;
+
mutex_unlock(&cgroup_mutex);
- cgroup_put(cg_link->cgroup);
+
+ cgroup_put(cg);
}
static void bpf_cgroup_link_dealloc(struct bpf_link *link)
@@ -833,6 +849,13 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link)
kfree(cg_link);
}
+static int bpf_cgroup_link_detach(struct bpf_link *link)
+{
+ bpf_cgroup_link_release(link);
+
+ return 0;
+}
+
static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
@@ -872,6 +895,7 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_cgroup_link_lops = {
.release = bpf_cgroup_link_release,
.dealloc = bpf_cgroup_link_dealloc,
+ .detach = bpf_cgroup_link_detach,
.update_prog = cgroup_bpf_replace,
.show_fdinfo = bpf_cgroup_link_show_fdinfo,
.fill_link_info = bpf_cgroup_link_fill_link_info,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7be02e555ab9..bde93344164d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,24 +2097,12 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
: 0;
}
-static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
-{
- enum bpf_cgroup_storage_type stype;
-
- for_each_cgroup_storage_type(stype) {
- if (!aux->cgroup_storage[stype])
- continue;
- bpf_cgroup_storage_release(aux, aux->cgroup_storage[stype]);
- }
-}
-
void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len)
{
struct bpf_map *map;
u32 i;
- bpf_free_cgroup_storage(aux);
for (i = 0; i < len; i++) {
map = used_maps[i];
if (map->ops->map_poke_untrack)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7b136953f214..78dfff6a501b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1618,6 +1618,196 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
true, false);
}
+struct bpf_iter_seq_hash_map_info {
+ struct bpf_map *map;
+ struct bpf_htab *htab;
+ void *percpu_value_buf; // non-zero means percpu hash
+ unsigned long flags;
+ u32 bucket_id;
+ u32 skip_elems;
+};
+
+static struct htab_elem *
+bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
+ struct htab_elem *prev_elem)
+{
+ const struct bpf_htab *htab = info->htab;
+ unsigned long flags = info->flags;
+ u32 skip_elems = info->skip_elems;
+ u32 bucket_id = info->bucket_id;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
+ struct htab_elem *elem;
+ struct bucket *b;
+ u32 i, count;
+
+ if (bucket_id >= htab->n_buckets)
+ return NULL;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ /* no update/deletion on this bucket, prev_elem should be still valid
+ * and we won't skip elements.
+ */
+ n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
+ elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
+ if (elem)
+ return elem;
+
+ /* not found, unlock and go to the next bucket */
+ b = &htab->buckets[bucket_id++];
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ for (i = bucket_id; i < htab->n_buckets; i++) {
+ b = &htab->buckets[i];
+ flags = htab_lock_bucket(htab, b);
+
+ count = 0;
+ head = &b->head;
+ hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+ if (count >= skip_elems) {
+ info->flags = flags;
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return elem;
+ }
+ count++;
+ }
+
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+}
+
+static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ struct htab_elem *elem;
+
+ elem = bpf_hash_map_seq_find_next(info, NULL);
+ if (!elem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return elem;
+}
+
+static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_hash_map_seq_find_next(info, v);
+}
+
+static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ u32 roundup_key_size, roundup_value_size;
+ struct bpf_iter__bpf_map_elem ctx = {};
+ struct bpf_map *map = info->map;
+ struct bpf_iter_meta meta;
+ int ret = 0, off = 0, cpu;
+ struct bpf_prog *prog;
+ void __percpu *pptr;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, elem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ roundup_key_size = round_up(map->key_size, 8);
+ ctx.key = elem->key;
+ if (!info->percpu_value_buf) {
+ ctx.value = elem->key + roundup_key_size;
+ } else {
+ roundup_value_size = round_up(map->value_size, 8);
+ pptr = htab_elem_get_ptr(elem, map->key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu),
+ roundup_value_size);
+ off += roundup_value_size;
+ }
+ ctx.value = info->percpu_value_buf;
+ }
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_hash_map_seq_show(seq, v);
+}
+
+static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ if (!v)
+ (void)__bpf_hash_map_seq_show(seq, NULL);
+ else
+ htab_unlock_bucket(info->htab,
+ &info->htab->buckets[info->bucket_id],
+ info->flags);
+}
+
+static int bpf_iter_init_hash_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+ struct bpf_map *map = aux->map;
+ void *value_buf;
+ u32 buf_size;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+ if (!value_buf)
+ return -ENOMEM;
+
+ seq_info->percpu_value_buf = value_buf;
+ }
+
+ seq_info->map = map;
+ seq_info->htab = container_of(map, struct bpf_htab, map);
+ return 0;
+}
+
+static void bpf_iter_fini_hash_map(void *priv_data)
+{
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+
+ kfree(seq_info->percpu_value_buf);
+}
+
+static const struct seq_operations bpf_hash_map_seq_ops = {
+ .start = bpf_hash_map_seq_start,
+ .next = bpf_hash_map_seq_next,
+ .stop = bpf_hash_map_seq_stop,
+ .show = bpf_hash_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_hash_map_seq_ops,
+ .init_seq_private = bpf_iter_init_hash_map,
+ .fini_seq_private = bpf_iter_fini_hash_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
+};
+
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_alloc_check = htab_map_alloc_check,
@@ -1632,6 +1822,7 @@ const struct bpf_map_ops htab_map_ops = {
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_map_btf_id;
@@ -1649,6 +1840,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
/* Called from eBPF program */
@@ -1766,6 +1958,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_percpu_map_btf_id;
@@ -1781,6 +1974,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int fd_htab_map_alloc_check(union bpf_attr *attr)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 51bd5a8cb01b..571bb351ed3b 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -13,6 +13,8 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO
#ifdef CONFIG_CGROUP_BPF
+#include "../cgroup/cgroup-internal.h"
+
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
@@ -20,7 +22,6 @@ struct bpf_cgroup_storage_map {
struct bpf_map map;
spinlock_t lock;
- struct bpf_prog_aux *aux;
struct rb_root root;
struct list_head list;
};
@@ -30,24 +31,41 @@ static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
return container_of(map, struct bpf_cgroup_storage_map, map);
}
-static int bpf_cgroup_storage_key_cmp(
- const struct bpf_cgroup_storage_key *key1,
- const struct bpf_cgroup_storage_key *key2)
+static bool attach_type_isolated(const struct bpf_map *map)
{
- if (key1->cgroup_inode_id < key2->cgroup_inode_id)
- return -1;
- else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
- return 1;
- else if (key1->attach_type < key2->attach_type)
- return -1;
- else if (key1->attach_type > key2->attach_type)
- return 1;
+ return map->key_size == sizeof(struct bpf_cgroup_storage_key);
+}
+
+static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map,
+ const void *_key1, const void *_key2)
+{
+ if (attach_type_isolated(&map->map)) {
+ const struct bpf_cgroup_storage_key *key1 = _key1;
+ const struct bpf_cgroup_storage_key *key2 = _key2;
+
+ if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+ return -1;
+ else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+ return 1;
+ else if (key1->attach_type < key2->attach_type)
+ return -1;
+ else if (key1->attach_type > key2->attach_type)
+ return 1;
+ } else {
+ const __u64 *cgroup_inode_id1 = _key1;
+ const __u64 *cgroup_inode_id2 = _key2;
+
+ if (*cgroup_inode_id1 < *cgroup_inode_id2)
+ return -1;
+ else if (*cgroup_inode_id1 > *cgroup_inode_id2)
+ return 1;
+ }
return 0;
}
-static struct bpf_cgroup_storage *cgroup_storage_lookup(
- struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
- bool locked)
+struct bpf_cgroup_storage *
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
+ void *key, bool locked)
{
struct rb_root *root = &map->root;
struct rb_node *node;
@@ -61,7 +79,7 @@ static struct bpf_cgroup_storage *cgroup_storage_lookup(
storage = container_of(node, struct bpf_cgroup_storage, node);
- switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+ switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) {
case -1:
node = node->rb_left;
break;
@@ -93,7 +111,7 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
this = container_of(*new, struct bpf_cgroup_storage, node);
parent = *new;
- switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+ switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) {
case -1:
new = &((*new)->rb_left);
break;
@@ -111,10 +129,9 @@ static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
return 0;
}
-static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
storage = cgroup_storage_lookup(map, key, false);
@@ -124,17 +141,13 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
return &READ_ONCE(storage->buf)->data[0];
}
-static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
- if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
- return -EINVAL;
-
- if (unlikely(flags & BPF_NOEXIST))
+ if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST)))
return -EINVAL;
if (unlikely((flags & BPF_F_LOCK) &&
@@ -167,11 +180,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
return 0;
}
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,
void *value)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu, off = 0;
u32 size;
@@ -197,11 +209,10 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
return 0;
}
-int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
+int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,
void *value, u64 map_flags)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu, off = 0;
u32 size;
@@ -232,12 +243,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
return 0;
}
-static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key,
void *_next_key)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- struct bpf_cgroup_storage_key *key = _key;
- struct bpf_cgroup_storage_key *next = _next_key;
struct bpf_cgroup_storage *storage;
spin_lock_bh(&map->lock);
@@ -250,17 +259,23 @@ static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
if (!storage)
goto enoent;
- storage = list_next_entry(storage, list);
+ storage = list_next_entry(storage, list_map);
if (!storage)
goto enoent;
} else {
storage = list_first_entry(&map->list,
- struct bpf_cgroup_storage, list);
+ struct bpf_cgroup_storage, list_map);
}
spin_unlock_bh(&map->lock);
- next->attach_type = storage->key.attach_type;
- next->cgroup_inode_id = storage->key.cgroup_inode_id;
+
+ if (attach_type_isolated(&map->map)) {
+ struct bpf_cgroup_storage_key *next = _next_key;
+ *next = storage->key;
+ } else {
+ __u64 *next = _next_key;
+ *next = storage->key.cgroup_inode_id;
+ }
return 0;
enoent:
@@ -275,7 +290,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
struct bpf_map_memory mem;
int ret;
- if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+ if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
+ attr->key_size != sizeof(__u64))
return ERR_PTR(-EINVAL);
if (attr->value_size == 0)
@@ -318,6 +334,17 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *_map)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+ struct list_head *storages = &map->list;
+ struct bpf_cgroup_storage *storage, *stmp;
+
+ mutex_lock(&cgroup_mutex);
+
+ list_for_each_entry_safe(storage, stmp, storages, list_map) {
+ bpf_cgroup_storage_unlink(storage);
+ bpf_cgroup_storage_free(storage);
+ }
+
+ mutex_unlock(&cgroup_mutex);
WARN_ON(!RB_EMPTY_ROOT(&map->root));
WARN_ON(!list_empty(&map->list));
@@ -335,49 +362,63 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- struct btf_member *m;
- u32 offset, size;
-
- /* Key is expected to be of struct bpf_cgroup_storage_key type,
- * which is:
- * struct bpf_cgroup_storage_key {
- * __u64 cgroup_inode_id;
- * __u32 attach_type;
- * };
- */
+ if (attach_type_isolated(map)) {
+ struct btf_member *m;
+ u32 offset, size;
+
+ /* Key is expected to be of struct bpf_cgroup_storage_key type,
+ * which is:
+ * struct bpf_cgroup_storage_key {
+ * __u64 cgroup_inode_id;
+ * __u32 attach_type;
+ * };
+ */
+
+ /*
+ * Key_type must be a structure with two fields.
+ */
+ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
+ BTF_INFO_VLEN(key_type->info) != 2)
+ return -EINVAL;
+
+ /*
+ * The first field must be a 64 bit integer at 0 offset.
+ */
+ m = (struct btf_member *)(key_type + 1);
+ size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
+ if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
+ return -EINVAL;
+
+ /*
+ * The second field must be a 32 bit integer at 64 bit offset.
+ */
+ m++;
+ offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
+ size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
+ if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
+ return -EINVAL;
+ } else {
+ u32 int_data;
- /*
- * Key_type must be a structure with two fields.
- */
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
- BTF_INFO_VLEN(key_type->info) != 2)
- return -EINVAL;
+ /*
+ * Key is expected to be u64, which stores the cgroup_inode_id
+ */
- /*
- * The first field must be a 64 bit integer at 0 offset.
- */
- m = (struct btf_member *)(key_type + 1);
- size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
- if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
- return -EINVAL;
+ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+ return -EINVAL;
- /*
- * The second field must be a 32 bit integer at 64 bit offset.
- */
- m++;
- offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
- size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
- if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
- return -EINVAL;
+ int_data = *(u32 *)(key_type + 1);
+ if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data))
+ return -EINVAL;
+ }
return 0;
}
-static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
+static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage_key *key = _key;
struct bpf_cgroup_storage *storage;
int cpu;
@@ -426,38 +467,13 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
- struct bpf_cgroup_storage_map *map = map_to_storage(_map);
- int ret = -EBUSY;
-
- spin_lock_bh(&map->lock);
- if (map->aux && map->aux != aux)
- goto unlock;
if (aux->cgroup_storage[stype] &&
aux->cgroup_storage[stype] != _map)
- goto unlock;
+ return -EBUSY;
- map->aux = aux;
aux->cgroup_storage[stype] = _map;
- ret = 0;
-unlock:
- spin_unlock_bh(&map->lock);
-
- return ret;
-}
-
-void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *_map)
-{
- enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
- struct bpf_cgroup_storage_map *map = map_to_storage(_map);
-
- spin_lock_bh(&map->lock);
- if (map->aux == aux) {
- WARN_ON(aux->cgroup_storage[stype] != _map);
- map->aux = NULL;
- aux->cgroup_storage[stype] = NULL;
- }
- spin_unlock_bh(&map->lock);
+ return 0;
}
static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
@@ -578,7 +594,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
spin_lock_bh(&map->lock);
WARN_ON(cgroup_storage_insert(map, storage));
- list_add(&storage->list, &map->list);
+ list_add(&storage->list_map, &map->list);
+ list_add(&storage->list_cg, &cgroup->bpf.storages);
spin_unlock_bh(&map->lock);
}
@@ -596,7 +613,8 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
root = &map->root;
rb_erase(&storage->node, root);
- list_del(&storage->list);
+ list_del(&storage->list_map);
+ list_del(&storage->list_cg);
spin_unlock_bh(&map->lock);
}
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 8a7af11b411f..fbe1f557cb88 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -7,7 +7,7 @@
#include <linux/btf_ids.h>
struct bpf_iter_seq_map_info {
- u32 mid;
+ u32 map_id;
};
static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
@@ -15,27 +15,23 @@ static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_map_info *info = seq->private;
struct bpf_map *map;
- map = bpf_map_get_curr_or_next(&info->mid);
+ map = bpf_map_get_curr_or_next(&info->map_id);
if (!map)
return NULL;
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
return map;
}
static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_map_info *info = seq->private;
- struct bpf_map *map;
++*pos;
- ++info->mid;
+ ++info->map_id;
bpf_map_put((struct bpf_map *)v);
- map = bpf_map_get_curr_or_next(&info->mid);
- if (!map)
- return NULL;
-
- return map;
+ return bpf_map_get_curr_or_next(&info->map_id);
}
struct bpf_iter__bpf_map {
@@ -85,23 +81,79 @@ static const struct seq_operations bpf_map_seq_ops = {
BTF_ID_LIST(btf_bpf_map_id)
BTF_ID(struct, bpf_map)
-static struct bpf_iter_reg bpf_map_reg_info = {
- .target = "bpf_map",
+static const struct bpf_iter_seq_info bpf_map_seq_info = {
.seq_ops = &bpf_map_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = sizeof(struct bpf_iter_seq_map_info),
+};
+
+static struct bpf_iter_reg bpf_map_reg_info = {
+ .target = "bpf_map",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map, map),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &bpf_map_seq_info,
+};
+
+static int bpf_iter_check_map(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux)
+{
+ u32 key_acc_size, value_acc_size, key_size, value_size;
+ struct bpf_map *map = aux->map;
+ bool is_percpu = false;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+ is_percpu = true;
+ else if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY)
+ return -EINVAL;
+
+ key_acc_size = prog->aux->max_rdonly_access;
+ value_acc_size = prog->aux->max_rdwr_access;
+ key_size = map->key_size;
+ if (!is_percpu)
+ value_size = map->value_size;
+ else
+ value_size = round_up(map->value_size, 8) * num_possible_cpus();
+
+ if (key_acc_size > key_size || value_acc_size > value_size)
+ return -EACCES;
+
+ return 0;
+}
+
+DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key, void *value)
+
+static const struct bpf_iter_reg bpf_map_elem_reg_info = {
+ .target = "bpf_map_elem",
+ .check_target = bpf_iter_check_map,
+ .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_map_elem, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_map_elem, value),
+ PTR_TO_RDWR_BUF_OR_NULL },
+ },
};
static int __init bpf_map_iter_init(void)
{
+ int ret;
+
bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
- return bpf_iter_reg_target(&bpf_map_reg_info);
+ ret = bpf_iter_reg_target(&bpf_map_reg_info);
+ if (ret)
+ return ret;
+
+ return bpf_iter_reg_target(&bpf_map_elem_reg_info);
}
late_initcall(bpf_map_iter_init);
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 71405edd667c..542f275bf252 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -142,9 +142,16 @@ static void bpf_netns_link_release(struct bpf_link *link)
bpf_prog_array_free(old_array);
out_unlock:
+ net_link->net = NULL;
mutex_unlock(&netns_bpf_mutex);
}
+static int bpf_netns_link_detach(struct bpf_link *link)
+{
+ bpf_netns_link_release(link);
+ return 0;
+}
+
static void bpf_netns_link_dealloc(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
@@ -228,6 +235,7 @@ static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
static const struct bpf_link_ops bpf_netns_link_ops = {
.release = bpf_netns_link_release,
.dealloc = bpf_netns_link_dealloc,
+ .detach = bpf_netns_link_detach,
.update_prog = bpf_netns_link_update_prog,
.fill_link_info = bpf_netns_link_fill_info,
.show_fdinfo = bpf_netns_link_show_fdinfo,
diff --git a/kernel/bpf/prog_iter.c b/kernel/bpf/prog_iter.c
new file mode 100644
index 000000000000..53a73c841c13
--- /dev/null
+++ b/kernel/bpf/prog_iter.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <linux/btf_ids.h>
+
+struct bpf_iter_seq_prog_info {
+ u32 prog_id;
+};
+
+static void *bpf_prog_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_prog_info *info = seq->private;
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_curr_or_next(&info->prog_id);
+ if (!prog)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return prog;
+}
+
+static void *bpf_prog_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_prog_info *info = seq->private;
+
+ ++*pos;
+ ++info->prog_id;
+ bpf_prog_put((struct bpf_prog *)v);
+ return bpf_prog_get_curr_or_next(&info->prog_id);
+}
+
+struct bpf_iter__bpf_prog {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_prog *, prog);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_prog, struct bpf_iter_meta *meta, struct bpf_prog *prog)
+
+static int __bpf_prog_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter__bpf_prog ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ ctx.meta = &meta;
+ ctx.prog = v;
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog)
+ ret = bpf_iter_run_prog(prog, &ctx);
+
+ return ret;
+}
+
+static int bpf_prog_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_prog_seq_show(seq, v, false);
+}
+
+static void bpf_prog_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_prog_seq_show(seq, v, true);
+ else
+ bpf_prog_put((struct bpf_prog *)v);
+}
+
+static const struct seq_operations bpf_prog_seq_ops = {
+ .start = bpf_prog_seq_start,
+ .next = bpf_prog_seq_next,
+ .stop = bpf_prog_seq_stop,
+ .show = bpf_prog_seq_show,
+};
+
+BTF_ID_LIST(btf_bpf_prog_id)
+BTF_ID(struct, bpf_prog)
+
+static const struct bpf_iter_seq_info bpf_prog_seq_info = {
+ .seq_ops = &bpf_prog_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_prog_info),
+};
+
+static struct bpf_iter_reg bpf_prog_reg_info = {
+ .target = "bpf_prog",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_prog, prog),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &bpf_prog_seq_info,
+};
+
+static int __init bpf_prog_iter_init(void)
+{
+ bpf_prog_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_prog_id;
+ return bpf_iter_reg_target(&bpf_prog_reg_info);
+}
+
+late_initcall(bpf_prog_iter_init);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 48d8e739975f..4fd830a62be2 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
#include <linux/elf.h>
@@ -387,11 +388,10 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
#endif
}
-BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
- u64, flags)
+static long __bpf_get_stackid(struct bpf_map *map,
+ struct perf_callchain_entry *trace, u64 flags)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
- struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / stack_map_data_size(map);
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
@@ -399,21 +399,9 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
- bool kernel = !user;
u64 *ips;
bool hash_matches;
- if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
- BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
- return -EINVAL;
-
- trace = get_perf_callchain(regs, init_nr, kernel, user,
- sysctl_perf_event_max_stack, false, false);
-
- if (unlikely(!trace))
- /* couldn't fetch the stack trace */
- return -EFAULT;
-
/* get_perf_callchain() guarantees that trace->nr >= init_nr
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
@@ -478,6 +466,30 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
return id;
}
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
+{
+ u32 max_depth = map->value_size / stack_map_data_size(map);
+ /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+ u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+ bool user = flags & BPF_F_USER_STACK;
+ struct perf_callchain_entry *trace;
+ bool kernel = !user;
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ trace = get_perf_callchain(regs, init_nr, kernel, user,
+ sysctl_perf_event_max_stack, false, false);
+
+ if (unlikely(!trace))
+ /* couldn't fetch the stack trace */
+ return -EFAULT;
+
+ return __bpf_get_stackid(map, trace, flags);
+}
+
const struct bpf_func_proto bpf_get_stackid_proto = {
.func = bpf_get_stackid,
.gpl_only = true,
@@ -487,7 +499,77 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};
+static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
+{
+ __u64 nr_kernel = 0;
+
+ while (nr_kernel < trace->nr) {
+ if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
+ break;
+ nr_kernel++;
+ }
+ return nr_kernel;
+}
+
+BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
+ struct bpf_map *, map, u64, flags)
+{
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool kernel, user;
+ __u64 nr_kernel;
+ int ret;
+
+ /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return bpf_get_stackid((unsigned long)(ctx->regs),
+ (unsigned long) map, flags, 0, 0);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+ return -EINVAL;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ trace = ctx->data->callchain;
+ if (unlikely(!trace))
+ return -EFAULT;
+
+ nr_kernel = count_kernel_ip(trace);
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ ret = __bpf_get_stackid(map, trace, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > BPF_F_SKIP_FIELD_MASK)
+ return -EFAULT;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
+ ret = __bpf_get_stackid(map, trace, flags);
+ }
+ return ret;
+}
+
+const struct bpf_func_proto bpf_get_stackid_proto_pe = {
+ .func = bpf_get_stackid_pe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
+ struct perf_callchain_entry *trace_in,
void *buf, u32 size, u64 flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
@@ -520,7 +602,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else
init_nr = sysctl_perf_event_max_stack - num_elem;
- if (kernel && task)
+ if (trace_in)
+ trace = trace_in;
+ else if (kernel && task)
trace = get_callchain_entry_for_task(task, init_nr);
else
trace = get_perf_callchain(regs, init_nr, kernel, user,
@@ -556,7 +640,7 @@ clear:
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
- return __bpf_get_stack(regs, NULL, buf, size, flags);
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
}
const struct bpf_func_proto bpf_get_stack_proto = {
@@ -574,7 +658,7 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
{
struct pt_regs *regs = task_pt_regs(task);
- return __bpf_get_stack(regs, task, buf, size, flags);
+ return __bpf_get_stack(regs, task, NULL, buf, size, flags);
}
BTF_ID_LIST(bpf_get_task_stack_btf_ids)
@@ -591,6 +675,69 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
.btf_id = bpf_get_task_stack_btf_ids,
};
+BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
+ void *, buf, u32, size, u64, flags)
+{
+ struct pt_regs *regs = (struct pt_regs *)(ctx->regs);
+ struct perf_event *event = ctx->event;
+ struct perf_callchain_entry *trace;
+ bool kernel, user;
+ int err = -EINVAL;
+ __u64 nr_kernel;
+
+ if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
+
+ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+ BPF_F_USER_BUILD_ID)))
+ goto clear;
+
+ user = flags & BPF_F_USER_STACK;
+ kernel = !user;
+
+ err = -EFAULT;
+ trace = ctx->data->callchain;
+ if (unlikely(!trace))
+ goto clear;
+
+ nr_kernel = count_kernel_ip(trace);
+
+ if (kernel) {
+ __u64 nr = trace->nr;
+
+ trace->nr = nr_kernel;
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+
+ /* restore nr */
+ trace->nr = nr;
+ } else { /* user */
+ u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
+
+ skip += nr_kernel;
+ if (skip > BPF_F_SKIP_FIELD_MASK)
+ goto clear;
+
+ flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+ }
+ return err;
+
+clear:
+ memset(buf, 0, size);
+ return err;
+
+}
+
+const struct bpf_func_proto bpf_get_stack_proto_pe = {
+ .func = bpf_get_stack_pe,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+};
+
/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d07417d17712..2f343ce15747 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2824,6 +2824,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_TRACING;
case BPF_SK_LOOKUP:
return BPF_PROG_TYPE_SK_LOOKUP;
+ case BPF_XDP:
+ return BPF_PROG_TYPE_XDP;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3044,6 +3046,25 @@ again:
return map;
}
+struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
+{
+ struct bpf_prog *prog;
+
+ spin_lock_bh(&prog_idr_lock);
+again:
+ prog = idr_get_next(&prog_idr, id);
+ if (prog) {
+ prog = bpf_prog_inc_not_zero(prog);
+ if (IS_ERR(prog)) {
+ (*id)++;
+ goto again;
+ }
+ }
+ spin_unlock_bh(&prog_idr_lock);
+
+ return prog;
+}
+
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
struct bpf_prog *bpf_prog_by_id(u32 id)
@@ -3902,6 +3923,11 @@ static int link_create(union bpf_attr *attr)
case BPF_PROG_TYPE_SK_LOOKUP:
ret = netns_bpf_link_create(attr, prog);
break;
+#ifdef CONFIG_NET
+ case BPF_PROG_TYPE_XDP:
+ ret = bpf_xdp_link_attach(attr, prog);
+ break;
+#endif
default:
ret = -EINVAL;
}
@@ -3965,6 +3991,29 @@ out_put_link:
return ret;
}
+#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
+
+static int link_detach(union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ int ret;
+
+ if (CHECK_ATTR(BPF_LINK_DETACH))
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->link_detach.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ if (link->ops->detach)
+ ret = link->ops->detach(link);
+ else
+ ret = -EOPNOTSUPP;
+
+ bpf_link_put(link);
+ return ret;
+}
+
static int bpf_link_inc_not_zero(struct bpf_link *link)
{
return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
@@ -4214,6 +4263,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_ITER_CREATE:
err = bpf_iter_create(&attr);
break;
+ case BPF_LINK_DETACH:
+ err = link_detach(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 2feecf095609..232df29793e9 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -51,7 +51,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
if (!task)
return NULL;
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
return task;
}
@@ -210,7 +211,8 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
return NULL;
}
- ++*pos;
+ if (*pos == 0)
+ ++*pos;
info->task = task;
info->files = files;
@@ -291,7 +293,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
}
}
-static int init_seq_pidns(void *priv_data)
+static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
{
struct bpf_iter_seq_task_common *common = priv_data;
@@ -317,25 +319,32 @@ BTF_ID_LIST(btf_task_file_ids)
BTF_ID(struct, task_struct)
BTF_ID(struct, file)
-static struct bpf_iter_reg task_reg_info = {
- .target = "task",
+static const struct bpf_iter_seq_info task_seq_info = {
.seq_ops = &task_seq_ops,
.init_seq_private = init_seq_pidns,
.fini_seq_private = fini_seq_pidns,
.seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
+};
+
+static struct bpf_iter_reg task_reg_info = {
+ .target = "task",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__task, task),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &task_seq_info,
};
-static struct bpf_iter_reg task_file_reg_info = {
- .target = "task_file",
+static const struct bpf_iter_seq_info task_file_seq_info = {
.seq_ops = &task_file_seq_ops,
.init_seq_private = init_seq_pidns,
.fini_seq_private = fini_seq_pidns,
.seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
+};
+
+static struct bpf_iter_reg task_file_reg_info = {
+ .target = "task_file",
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__task_file, task),
@@ -343,6 +352,7 @@ static struct bpf_iter_reg task_file_reg_info = {
{ offsetof(struct bpf_iter__task_file, file),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &task_file_seq_info,
};
static int __init task_iter_init(void)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9a6703bc3f36..b6ccfce3bf4c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -409,7 +409,9 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON_OR_NULL ||
type == PTR_TO_TCP_SOCK_OR_NULL ||
type == PTR_TO_BTF_ID_OR_NULL ||
- type == PTR_TO_MEM_OR_NULL;
+ type == PTR_TO_MEM_OR_NULL ||
+ type == PTR_TO_RDONLY_BUF_OR_NULL ||
+ type == PTR_TO_RDWR_BUF_OR_NULL;
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -503,6 +505,10 @@ static const char * const reg_type_str[] = {
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
+ [PTR_TO_RDONLY_BUF] = "rdonly_buf",
+ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
+ [PTR_TO_RDWR_BUF] = "rdwr_buf",
+ [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
};
static char slot_type_char[] = {
@@ -2173,6 +2179,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID_OR_NULL:
+ case PTR_TO_RDONLY_BUF:
+ case PTR_TO_RDONLY_BUF_OR_NULL:
+ case PTR_TO_RDWR_BUF:
+ case PTR_TO_RDWR_BUF_OR_NULL:
return true;
default:
return false;
@@ -3052,14 +3062,15 @@ int check_ctx_reg(struct bpf_verifier_env *env,
return 0;
}
-static int check_tp_buffer_access(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg,
- int regno, int off, int size)
+static int __check_buffer_access(struct bpf_verifier_env *env,
+ const char *buf_info,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size)
{
if (off < 0) {
verbose(env,
- "R%d invalid tracepoint buffer access: off=%d, size=%d",
- regno, off, size);
+ "R%d invalid %s buffer access: off=%d, size=%d\n",
+ regno, buf_info, off, size);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
@@ -3067,16 +3078,49 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env,
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env,
- "R%d invalid variable buffer offset: off=%d, var_off=%s",
+ "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
regno, off, tn_buf);
return -EACCES;
}
+
+ return 0;
+}
+
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size)
+{
+ int err;
+
+ err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
+ if (err)
+ return err;
+
if (off + size > env->prog->aux->max_tp_access)
env->prog->aux->max_tp_access = off + size;
return 0;
}
+static int check_buffer_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size,
+ bool zero_size_allowed,
+ const char *buf_info,
+ u32 *max_access)
+{
+ int err;
+
+ err = __check_buffer_access(env, buf_info, reg, regno, off, size);
+ if (err)
+ return err;
+
+ if (off + size > *max_access)
+ *max_access = off + size;
+
+ return 0;
+}
+
/* BPF architecture zero extends alu32 ops into 64-bit registesr */
static void zext_32_to_64(struct bpf_reg_state *reg)
{
@@ -3427,6 +3471,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else if (reg->type == CONST_PTR_TO_MAP) {
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
value_regno);
+ } else if (reg->type == PTR_TO_RDONLY_BUF) {
+ if (t == BPF_WRITE) {
+ verbose(env, "R%d cannot write into %s\n",
+ regno, reg_type_str[reg->type]);
+ return -EACCES;
+ }
+ err = check_buffer_access(env, reg, regno, off, size, false,
+ "rdonly",
+ &env->prog->aux->max_rdonly_access);
+ if (!err && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_RDWR_BUF) {
+ err = check_buffer_access(env, reg, regno, off, size, false,
+ "rdwr",
+ &env->prog->aux->max_rdwr_access);
+ if (!err && t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -3668,6 +3729,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
+ case PTR_TO_RDONLY_BUF:
+ if (meta && meta->raw_mode)
+ return -EACCES;
+ return check_buffer_access(env, reg, regno, reg->off,
+ access_size, zero_size_allowed,
+ "rdonly",
+ &env->prog->aux->max_rdonly_access);
+ case PTR_TO_RDWR_BUF:
+ return check_buffer_access(env, reg, regno, reg->off,
+ access_size, zero_size_allowed,
+ "rdwr",
+ &env->prog->aux->max_rdwr_access);
default: /* scalar_value|ptr_to_stack or invalid ptr */
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
@@ -3933,6 +4006,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
else if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != PTR_TO_MEM &&
+ type != PTR_TO_RDONLY_BUF &&
+ type != PTR_TO_RDWR_BUF &&
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
@@ -4887,6 +4962,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
env->prog->has_callchain_buf = true;
}
+ if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
+ env->prog->call_get_stack = true;
+
if (changes_data)
clear_all_pkt_pointers(env);
return 0;
@@ -6806,6 +6884,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
reg->type = PTR_TO_BTF_ID;
} else if (reg->type == PTR_TO_MEM_OR_NULL) {
reg->type = PTR_TO_MEM;
+ } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
+ reg->type = PTR_TO_RDONLY_BUF;
+ } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
+ reg->type = PTR_TO_RDWR_BUF;
}
if (is_null) {
/* We don't need id and ref_obj_id from this point
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 856d98c36f56..ddcfd2fb5cc5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9544,6 +9544,24 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
if (IS_ERR(prog))
return PTR_ERR(prog);
+ if (event->attr.precise_ip &&
+ prog->call_get_stack &&
+ (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
+ event->attr.exclude_callchain_kernel ||
+ event->attr.exclude_callchain_user)) {
+ /*
+ * On perf_event with precise_ip, calling bpf_get_stack()
+ * may trigger unwinder warnings and occasional crashes.
+ * bpf_get_[stack|stackid] works around this issue by using
+ * callchain attached to perf_sample_data. If the
+ * perf_event does not full (kernel and user) callchain
+ * attached to perf_sample_data, do not allow attaching BPF
+ * program that calls bpf_get_[stack|stackid].
+ */
+ bpf_prog_put(prog);
+ return -EPROTO;
+ }
+
event->prog = prog;
event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3cc0dcb60ca2..cb91ef902cc4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1411,9 +1411,9 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
- return &bpf_get_stackid_proto_tp;
+ return &bpf_get_stackid_proto_pe;
case BPF_FUNC_get_stack:
- return &bpf_get_stack_proto_tp;
+ return &bpf_get_stack_proto_pe;
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
case BPF_FUNC_read_branch_records:
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b03c469cd01f..99eb8c6c0fbc 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -327,6 +327,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* priority is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
+ offsetof(struct __sk_buff, ifindex)))
+ return -EINVAL;
+
+ /* ifindex is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
offsetof(struct __sk_buff, cb)))
return -EINVAL;
@@ -381,6 +387,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
__skb->mark = skb->mark;
__skb->priority = skb->priority;
+ __skb->ifindex = skb->dev->ifindex;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
__skb->wire_len = cb->pkt_len;
@@ -391,6 +398,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev = net->loopback_dev;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct __sk_buff *ctx = NULL;
@@ -432,7 +441,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
kfree(ctx);
return -ENOMEM;
}
- sock_net_set(sk, current->nsproxy->net_ns);
+ sock_net_set(sk, net);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
@@ -446,9 +455,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
- skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+ if (ctx && ctx->ifindex > 1) {
+ dev = dev_get_by_index(net, ctx->ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
+ skb->protocol = eth_type_trans(skb, dev);
skb_reset_network_header(skb);
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ sk->sk_family = AF_INET;
+ if (sizeof(struct iphdr) <= skb_headlen(skb)) {
+ sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
+ sk->sk_daddr = ip_hdr(skb)->daddr;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ sk->sk_family = AF_INET6;
+ if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
+ sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
+ sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
if (is_l2)
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
@@ -481,6 +518,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
out:
+ if (dev && dev != net->loopback_dev)
+ dev_put(dev);
kfree_skb(skb);
bpf_sk_storage_free(sk);
kfree(sk);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 6f921c4ddc2c..d3377c90a291 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
@@ -943,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
+ .func = bpf_sk_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.func = bpf_sk_storage_delete,
.gpl_only = false,
@@ -1217,3 +1228,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
return err;
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
+
+struct bpf_iter_seq_sk_storage_map_info {
+ struct bpf_map *map;
+ unsigned int bucket_id;
+ unsigned skip_elems;
+};
+
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
+ struct bpf_sk_storage_elem *prev_selem)
+{
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ u32 skip_elems = info->skip_elems;
+ struct bpf_sk_storage_map *smap;
+ u32 bucket_id = info->bucket_id;
+ u32 i, count, n_buckets;
+ struct bucket *b;
+
+ smap = (struct bpf_sk_storage_map *)info->map;
+ n_buckets = 1U << smap->bucket_log;
+ if (bucket_id >= n_buckets)
+ return NULL;
+
+ /* try to find next selem in the same bucket */
+ selem = prev_selem;
+ count = 0;
+ while (selem) {
+ selem = hlist_entry_safe(selem->map_node.next,
+ struct bpf_sk_storage_elem, map_node);
+ if (!selem) {
+ /* not found, unlock and go to the next bucket */
+ b = &smap->buckets[bucket_id++];
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ break;
+ }
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage) {
+ info->skip_elems = skip_elems + count;
+ return selem;
+ }
+ count++;
+ }
+
+ for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
+ b = &smap->buckets[i];
+ raw_spin_lock_bh(&b->lock);
+ count = 0;
+ hlist_for_each_entry(selem, &b->list, map_node) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage && count >= skip_elems) {
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return selem;
+ }
+ count++;
+ }
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+}
+
+static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_sk_storage_elem *selem;
+
+ selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
+ if (!selem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return selem;
+}
+
+static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_sk_storage_map_seq_find_next(seq->private, v);
+}
+
+struct bpf_iter__bpf_sk_storage_map {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(struct sock *, sk);
+ __bpf_md_ptr(void *, value);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
+ struct bpf_map *map, struct sock *sk,
+ void *value)
+
+static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_iter__bpf_sk_storage_map ctx = {};
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, selem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (selem) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ ctx.sk = sk_storage->sk;
+ ctx.value = SDATA(selem)->data;
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_sk_storage_map_seq_show(seq, v);
+}
+
+static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_sk_storage_map *smap;
+ struct bucket *b;
+
+ if (!v) {
+ (void)__bpf_sk_storage_map_seq_show(seq, v);
+ } else {
+ smap = (struct bpf_sk_storage_map *)info->map;
+ b = &smap->buckets[info->bucket_id];
+ raw_spin_unlock_bh(&b->lock);
+ }
+}
+
+static int bpf_iter_init_sk_storage_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
+ seq_info->map = aux->map;
+ return 0;
+}
+
+static int bpf_iter_check_map(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map = aux->map;
+
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ return -EINVAL;
+
+ if (prog->aux->max_rdonly_access > map->value_size)
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct seq_operations bpf_sk_storage_map_seq_ops = {
+ .start = bpf_sk_storage_map_seq_start,
+ .next = bpf_sk_storage_map_seq_next,
+ .stop = bpf_sk_storage_map_seq_stop,
+ .show = bpf_sk_storage_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_sk_storage_map_seq_ops,
+ .init_seq_private = bpf_iter_init_sk_storage_map,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
+};
+
+static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
+ .target = "bpf_sk_storage_map",
+ .check_target = bpf_iter_check_map,
+ .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
+ PTR_TO_RDWR_BUF_OR_NULL },
+ },
+ .seq_info = &iter_seq_info,
+};
+
+static int __init bpf_sk_storage_map_iter_init(void)
+{
+ bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
+}
+late_initcall(bpf_sk_storage_map_iter_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index f7ef0f5c5569..7df6c9617321 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5467,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
}
break;
- case XDP_QUERY_PROG:
- xdp->prog_id = old ? old->aux->id : 0;
- break;
-
default:
ret = -EINVAL;
break;
@@ -8740,189 +8736,464 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
}
EXPORT_SYMBOL(dev_change_proto_down_reason);
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
- enum bpf_netdev_command cmd)
+struct bpf_xdp_link {
+ struct bpf_link link;
+ struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
+ int flags;
+};
+
+static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
{
- struct netdev_bpf xdp;
+ if (flags & XDP_FLAGS_HW_MODE)
+ return XDP_MODE_HW;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return XDP_MODE_DRV;
+ return XDP_MODE_SKB;
+}
- if (!bpf_op)
- return 0;
+static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ switch (mode) {
+ case XDP_MODE_SKB:
+ return generic_xdp_install;
+ case XDP_MODE_DRV:
+ case XDP_MODE_HW:
+ return dev->netdev_ops->ndo_bpf;
+ default:
+ return NULL;
+ };
+}
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = cmd;
+static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ return dev->xdp_state[mode].link;
+}
+
+static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
+
+ if (link)
+ return link->link.prog;
+ return dev->xdp_state[mode].prog;
+}
+
+u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ struct bpf_prog *prog = dev_xdp_prog(dev, mode);
- /* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+ return prog ? prog->aux->id : 0;
+}
- return xdp.prog_id;
+static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_xdp_link *link)
+{
+ dev->xdp_state[mode].link = link;
+ dev->xdp_state[mode].prog = NULL;
}
-static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
- struct netlink_ext_ack *extack, u32 flags,
- struct bpf_prog *prog)
+static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_prog *prog)
+{
+ dev->xdp_state[mode].link = NULL;
+ dev->xdp_state[mode].prog = prog;
+}
+
+static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
+ bpf_op_t bpf_op, struct netlink_ext_ack *extack,
+ u32 flags, struct bpf_prog *prog)
{
- bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
- struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
int err;
- if (non_hw) {
- prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
- XDP_QUERY_PROG));
- if (IS_ERR(prev_prog))
- prev_prog = NULL;
- }
-
memset(&xdp, 0, sizeof(xdp));
- if (flags & XDP_FLAGS_HW_MODE)
- xdp.command = XDP_SETUP_PROG_HW;
- else
- xdp.command = XDP_SETUP_PROG;
+ xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
xdp.flags = flags;
xdp.prog = prog;
+ /* Drivers assume refcnt is already incremented (i.e, prog pointer is
+ * "moved" into driver), so they don't increment it on their own, but
+ * they do decrement refcnt when program is detached or replaced.
+ * Given net_device also owns link/prog, we need to bump refcnt here
+ * to prevent drivers from underflowing it.
+ */
+ if (prog)
+ bpf_prog_inc(prog);
err = bpf_op(dev, &xdp);
- if (!err && non_hw)
- bpf_prog_change_xdp(prev_prog, prog);
+ if (err) {
+ if (prog)
+ bpf_prog_put(prog);
+ return err;
+ }
- if (prev_prog)
- bpf_prog_put(prev_prog);
+ if (mode != XDP_MODE_HW)
+ bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
- return err;
+ return 0;
}
static void dev_xdp_uninstall(struct net_device *dev)
{
- struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
+ struct bpf_xdp_link *link;
+ struct bpf_prog *prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
- /* Remove generic XDP */
- WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+ ASSERT_RTNL();
- /* Remove from the driver */
- ndo_bpf = dev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
- return;
+ for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
+ prog = dev_xdp_prog(dev, mode);
+ if (!prog)
+ continue;
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
- WARN_ON(ndo_bpf(dev, &xdp));
- if (xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op)
+ continue;
- /* Remove HW offload */
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG_HW;
- if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+
+ /* auto-detach link from net device */
+ link = dev_xdp_link(dev, mode);
+ if (link)
+ link->dev = NULL;
+ else
+ bpf_prog_put(prog);
+
+ dev_xdp_set_link(dev, mode, NULL);
+ }
}
-/**
- * dev_change_xdp_fd - set or clear a bpf program for a device rx path
- * @dev: device
- * @extack: netlink extended ack
- * @fd: new program fd or negative value to clear
- * @expected_fd: old program fd that userspace expects to replace or clear
- * @flags: xdp-related flags
- *
- * Set or clear a bpf program for a device
- */
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
- int fd, int expected_fd, u32 flags)
+static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog, u32 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
- enum bpf_netdev_command query;
- u32 prog_id, expected_id = 0;
- bpf_op_t bpf_op, bpf_chk;
- struct bpf_prog *prog;
- bool offload;
+ struct bpf_prog *cur_prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
int err;
ASSERT_RTNL();
- offload = flags & XDP_FLAGS_HW_MODE;
- query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+ /* either link or prog attachment, never both */
+ if (link && (new_prog || old_prog))
+ return -EINVAL;
+ /* link supports only XDP mode flags */
+ if (link && (flags & ~XDP_FLAGS_MODES)) {
+ NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
+ return -EINVAL;
+ }
+ /* just one XDP mode bit should be set, zero defaults to SKB mode */
+ if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+ NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
+ return -EINVAL;
+ }
+ /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
+ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
+ return -EINVAL;
+ }
- bpf_op = bpf_chk = ops->ndo_bpf;
- if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
- NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
- return -EOPNOTSUPP;
+ mode = dev_xdp_mode(flags);
+ /* can't replace attached link */
+ if (dev_xdp_link(dev, mode)) {
+ NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
+ return -EBUSY;
}
- if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
- bpf_op = generic_xdp_install;
- if (bpf_op == bpf_chk)
- bpf_chk = generic_xdp_install;
-
- prog_id = __dev_xdp_query(dev, bpf_op, query);
- if (flags & XDP_FLAGS_REPLACE) {
- if (expected_fd >= 0) {
- prog = bpf_prog_get_type_dev(expected_fd,
- BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- expected_id = prog->aux->id;
- bpf_prog_put(prog);
- }
- if (prog_id != expected_id) {
- NL_SET_ERR_MSG(extack, "Active program does not match expected");
- return -EEXIST;
- }
+ cur_prog = dev_xdp_prog(dev, mode);
+ /* can't replace attached prog with link */
+ if (link && cur_prog) {
+ NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
+ return -EBUSY;
+ }
+ if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
+ NL_SET_ERR_MSG(extack, "Active program does not match expected");
+ return -EEXIST;
+ }
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+ NL_SET_ERR_MSG(extack, "XDP program already attached");
+ return -EBUSY;
}
- if (fd >= 0) {
- if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
- NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
- return -EEXIST;
- }
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
- NL_SET_ERR_MSG(extack, "XDP program already attached");
- return -EBUSY;
- }
+ /* put effective new program into new_prog */
+ if (link)
+ new_prog = link->link.prog;
- prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (new_prog) {
+ bool offload = mode == XDP_MODE_HW;
+ enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
+ ? XDP_MODE_DRV : XDP_MODE_SKB;
- if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
- NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
- bpf_prog_put(prog);
+ if (!offload && dev_xdp_prog(dev, other_mode)) {
+ NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
+ return -EEXIST;
+ }
+ if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
- bpf_prog_put(prog);
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
- NL_SET_ERR_MSG(extack,
- "BPF_XDP_CPUMAP programs can not be attached to a device");
- bpf_prog_put(prog);
+ if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
return -EINVAL;
}
+ }
- /* prog->aux->id may be 0 for orphaned device-bound progs */
- if (prog->aux->id && prog->aux->id == prog_id) {
- bpf_prog_put(prog);
- return 0;
+ /* don't call drivers if the effective program didn't change */
+ if (new_prog != cur_prog) {
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op) {
+ NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
+ return -EOPNOTSUPP;
+ }
+
+ err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
+ if (err)
+ return err;
+ }
+
+ if (link)
+ dev_xdp_set_link(dev, mode, link);
+ else
+ dev_xdp_set_prog(dev, mode, new_prog);
+ if (cur_prog)
+ bpf_prog_put(cur_prog);
+
+ return 0;
+}
+
+static int dev_xdp_attach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
+}
+
+static int dev_xdp_detach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+
+ ASSERT_RTNL();
+
+ mode = dev_xdp_mode(link->flags);
+ if (dev_xdp_link(dev, mode) != link)
+ return -EINVAL;
+
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+ dev_xdp_set_link(dev, mode, NULL);
+ return 0;
+}
+
+static void bpf_xdp_link_release(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ rtnl_lock();
+
+ /* if racing with net_device's tear down, xdp_link->dev might be
+ * already NULL, in which case link was already auto-detached
+ */
+ if (xdp_link->dev) {
+ WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+ xdp_link->dev = NULL;
+ }
+
+ rtnl_unlock();
+}
+
+static int bpf_xdp_link_detach(struct bpf_link *link)
+{
+ bpf_xdp_link_release(link);
+ return 0;
+}
+
+static void bpf_xdp_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ kfree(xdp_link);
+}
+
+static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ seq_printf(seq, "ifindex:\t%u\n", ifindex);
+}
+
+static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ info->xdp.ifindex = ifindex;
+ return 0;
+}
+
+static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+ int err = 0;
+
+ rtnl_lock();
+
+ /* link might have been auto-released already, so fail */
+ if (!xdp_link->dev) {
+ err = -ENOLINK;
+ goto out_unlock;
+ }
+
+ if (old_prog && link->prog != old_prog) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ old_prog = link->prog;
+ if (old_prog == new_prog) {
+ /* no-op, don't disturb drivers */
+ bpf_prog_put(new_prog);
+ goto out_unlock;
+ }
+
+ mode = dev_xdp_mode(xdp_link->flags);
+ bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
+ err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
+ xdp_link->flags, new_prog);
+ if (err)
+ goto out_unlock;
+
+ old_prog = xchg(&link->prog, new_prog);
+ bpf_prog_put(old_prog);
+
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static const struct bpf_link_ops bpf_xdp_link_lops = {
+ .release = bpf_xdp_link_release,
+ .dealloc = bpf_xdp_link_dealloc,
+ .detach = bpf_xdp_link_detach,
+ .show_fdinfo = bpf_xdp_link_show_fdinfo,
+ .fill_link_info = bpf_xdp_link_fill_link_info,
+ .update_prog = bpf_xdp_link_update,
+};
+
+int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct bpf_xdp_link *link;
+ struct net_device *dev;
+ int err, fd;
+
+ dev = dev_get_by_index(net, attr->link_create.target_ifindex);
+ if (!dev)
+ return -EINVAL;
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_dev;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ link->dev = dev;
+ link->flags = attr->link_create.flags;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_dev;
+ }
+
+ rtnl_lock();
+ err = dev_xdp_attach_link(dev, NULL, link);
+ rtnl_unlock();
+
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_dev;
+ }
+
+ fd = bpf_link_settle(&link_primer);
+ /* link itself doesn't hold dev's refcnt to not complicate shutdown */
+ dev_put(dev);
+ return fd;
+
+out_put_dev:
+ dev_put(dev);
+ return err;
+}
+
+/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @extack: netlink extended ack
+ * @fd: new program fd or negative value to clear
+ * @expected_fd: old program fd that userspace expects to replace or clear
+ * @flags: xdp-related flags
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags)
+{
+ enum bpf_xdp_mode mode = dev_xdp_mode(flags);
+ struct bpf_prog *new_prog = NULL, *old_prog = NULL;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (fd >= 0) {
+ new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+ }
+
+ if (expected_fd >= 0) {
+ old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(old_prog)) {
+ err = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto err_out;
}
- } else {
- if (!prog_id)
- return 0;
- prog = NULL;
}
- err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
- if (err < 0 && prog)
- bpf_prog_put(prog);
+ err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
+err_out:
+ if (err && new_prog)
+ bpf_prog_put(new_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
return err;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 29e3455122f7..7124f0fe6974 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6187,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func)
}
const struct bpf_func_proto bpf_event_output_data_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -6219,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_cg_sock_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a54c3e0f2ee1..68e0682450c6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1426,13 +1426,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev)
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+ return dev_xdp_prog_id(dev, XDP_MODE_DRV);
}
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
- XDP_QUERY_PROG_HW);
+ return dev_xdp_prog_id(dev, XDP_MODE_HW);
}
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 3c45f99e26d5..48aba933a5a8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
-int xdp_attachment_query(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf)
-{
- bpf->prog_id = info->prog ? info->prog->aux->id : 0;
- bpf->prog_flags = info->prog ? info->flags : 0;
- return 0;
-}
-EXPORT_SYMBOL_GPL(xdp_attachment_query);
-
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8913923a6c0..5084333b5ab6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2921,7 +2921,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
-static int bpf_iter_init_tcp(void *priv_data)
+static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
{
struct tcp_iter_state *st = priv_data;
struct tcp_seq_afinfo *afinfo;
@@ -2933,7 +2933,7 @@ static int bpf_iter_init_tcp(void *priv_data)
afinfo->family = AF_UNSPEC;
st->bpf_seq_afinfo = afinfo;
- ret = bpf_iter_init_seq_net(priv_data);
+ ret = bpf_iter_init_seq_net(priv_data, aux);
if (ret)
kfree(afinfo);
return ret;
@@ -2947,17 +2947,21 @@ static void bpf_iter_fini_tcp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static struct bpf_iter_reg tcp_reg_info = {
- .target = "tcp",
+static const struct bpf_iter_seq_info tcp_seq_info = {
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
.fini_seq_private = bpf_iter_fini_tcp,
.seq_priv_size = sizeof(struct tcp_iter_state),
+};
+
+static struct bpf_iter_reg tcp_reg_info = {
+ .target = "tcp",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &tcp_seq_info,
};
static void __init bpf_iter_register(void)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0fb5e4ea133f..e88efba07551 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -473,7 +473,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net,
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk && !reuseport_has_conns(sk, false))
+ if (reuse_sk)
sk = reuse_sk;
return sk;
}
@@ -3181,7 +3181,7 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)
-static int bpf_iter_init_udp(void *priv_data)
+static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
{
struct udp_iter_state *st = priv_data;
struct udp_seq_afinfo *afinfo;
@@ -3194,7 +3194,7 @@ static int bpf_iter_init_udp(void *priv_data)
afinfo->family = AF_UNSPEC;
afinfo->udp_table = &udp_table;
st->bpf_seq_afinfo = afinfo;
- ret = bpf_iter_init_seq_net(priv_data);
+ ret = bpf_iter_init_seq_net(priv_data, aux);
if (ret)
kfree(afinfo);
return ret;
@@ -3208,17 +3208,21 @@ static void bpf_iter_fini_udp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static struct bpf_iter_reg udp_reg_info = {
- .target = "udp",
+static const struct bpf_iter_seq_info udp_seq_info = {
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
.fini_seq_private = bpf_iter_fini_udp,
.seq_priv_size = sizeof(struct udp_iter_state),
+};
+
+static struct bpf_iter_reg udp_reg_info = {
+ .target = "udp",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__udp, udp_sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &udp_seq_info,
};
static void __init bpf_iter_register(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48d499d763fa..5e7e25e2523a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6427,17 +6427,21 @@ DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *r
BTF_ID_LIST(btf_fib6_info_id)
BTF_ID(struct, fib6_info)
-static struct bpf_iter_reg ipv6_route_reg_info = {
- .target = "ipv6_route",
+static const struct bpf_iter_seq_info ipv6_route_seq_info = {
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
.fini_seq_private = bpf_iter_fini_seq_net,
.seq_priv_size = sizeof(struct ipv6_route_iter),
+};
+
+static struct bpf_iter_reg ipv6_route_reg_info = {
+ .target = "ipv6_route",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__ipv6_route, rt),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &ipv6_route_seq_info,
};
static int __init bpf_iter_register(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5530c9dcb61c..29d9691359b9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -155,9 +155,6 @@ static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- /* Fall back to scoring if group has connections */
- if (reuseport_has_conns(sk, false))
- return NULL;
}
return reuse_sk;
}
@@ -211,7 +208,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net,
return sk;
reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk && !reuseport_has_conns(sk, false))
+ if (reuse_sk)
sk = reuse_sk;
return sk;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d8921b833744..b5f30d7d30d0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2807,17 +2807,21 @@ static const struct rhashtable_params netlink_rhashtable_params = {
BTF_ID_LIST(btf_netlink_sock_id)
BTF_ID(struct, netlink_sock)
-static struct bpf_iter_reg netlink_reg_info = {
- .target = "netlink",
+static const struct bpf_iter_seq_info netlink_seq_info = {
.seq_ops = &netlink_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
.fini_seq_private = bpf_iter_fini_seq_net,
.seq_priv_size = sizeof(struct nl_seq_iter),
+};
+
+static struct bpf_iter_reg netlink_reg_info = {
+ .target = "netlink",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__netlink, sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &netlink_seq_info,
};
static int __init bpf_iter_register(void)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2e94a7e94671..c3231620d210 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -840,7 +840,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case XDP_STATISTICS:
{
- struct xdp_statistics stats;
+ struct xdp_statistics stats = {};
bool extra_stats = true;
size_t stats_size;
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 8dce698eab79..070ffacb42b5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -17,14 +17,15 @@ SYNOPSIS
ITER COMMANDS
===================
-| **bpftool** **iter pin** *OBJ* *PATH*
+| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*]
| **bpftool** **iter help**
|
| *OBJ* := /a/file/of/bpf_iter_target.o
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
DESCRIPTION
===========
- **bpftool iter pin** *OBJ* *PATH*
+ **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*]
A bpf iterator combines a kernel iterating of
particular kernel data (e.g., tasks, bpf_maps, etc.)
and a bpf program called for each kernel data object
@@ -37,6 +38,12 @@ DESCRIPTION
character ('.'), which is reserved for future extensions
of *bpffs*.
+ Map element bpf iterator requires an additional parameter
+ *MAP* so bpf program can iterate over map elements for
+ that map. User can have a bpf program in kernel to run
+ with each map element, do checking, filtering, aggregation,
+ etc. without copying data to user space.
+
User can then *cat PATH* to see the bpf iterator output.
**bpftool iter help**
@@ -64,6 +71,13 @@ EXAMPLES
Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
to /sys/fs/bpf/my_netlink
+**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
+ id 20, and pin it to /sys/fs/bpf/my_hashmap
+
SEE ALSO
========
**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 38b0949a185b..4a52e7a93339 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,6 +21,7 @@ LINK COMMANDS
| **bpftool** **link { show | list }** [*LINK*]
| **bpftool** **link pin** *LINK* *FILE*
+| **bpftool** **link detach *LINK*
| **bpftool** **link help**
|
| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -49,6 +50,13 @@ DESCRIPTION
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
+ **bpftool link detach** *LINK*
+ Force-detach link *LINK*. BPF link and its underlying BPF
+ program will stay valid, but they will be detached from the
+ respective BPF hook and BPF link will transition into
+ a defunct state until last open file descriptor for that
+ link is closed.
+
**bpftool link help**
Print short help message.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 51bd520ed437..8462690a039b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -59,6 +59,7 @@ endif
INSTALL ?= install
RM ?= rm -f
CLANG ?= clang
+LLVM_STRIP ?= llvm-strip
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
@@ -147,7 +148,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_PATH) \
-I$(srctree)/tools/lib \
- -g -O2 -target bpf -c $< -o $@
+ -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@
$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7b137264ea3a..f53ed2f1a4aa 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -615,7 +615,23 @@ _bpftool()
iter)
case $command in
pin)
- _filedir
+ case $prev in
+ $command)
+ _filedir
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ _bpftool_get_map_names
+ ;;
+ pinned)
+ _filedir
+ ;;
+ *)
+ _bpftool_one_of_list $MAP_TYPE
+ ;;
+ esac
return 0
;;
*)
@@ -1106,7 +1122,7 @@ _bpftool()
;;
link)
case $command in
- show|list|pin)
+ show|list|pin|detach)
case $prev in
id)
_bpftool_get_link_ids
@@ -1123,7 +1139,7 @@ _bpftool()
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
return 0
;;
- pin)
+ pin|detach)
if [[ $prev == "$command" ]]; then
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
else
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fc9bc7a23db6..8ab142ff5eac 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -422,54 +422,6 @@ done:
return err;
}
-static struct btf *btf__parse_raw(const char *file)
-{
- struct btf *btf;
- struct stat st;
- __u8 *buf;
- FILE *f;
-
- if (stat(file, &st))
- return NULL;
-
- f = fopen(file, "rb");
- if (!f)
- return NULL;
-
- buf = malloc(st.st_size);
- if (!buf) {
- btf = ERR_PTR(-ENOMEM);
- goto exit_close;
- }
-
- if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
- btf = ERR_PTR(-EINVAL);
- goto exit_free;
- }
-
- btf = btf__new(buf, st.st_size);
-
-exit_free:
- free(buf);
-exit_close:
- fclose(f);
- return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
- __u16 magic = 0;
- int fd, nb_read;
-
- fd = open(file, O_RDONLY);
- if (fd < 0)
- return false;
-
- nb_read = read(fd, &magic, sizeof(magic));
- close(fd);
- return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
static int do_dump(int argc, char **argv)
{
struct btf *btf = NULL;
@@ -547,11 +499,7 @@ static int do_dump(int argc, char **argv)
}
NEXT_ARG();
} else if (is_prefix(src, "file")) {
- if (is_btf_raw(*argv))
- btf = btf__parse_raw(*argv);
- else
- btf = btf__parse_elf(*argv, NULL);
-
+ btf = btf__parse(*argv, NULL);
if (IS_ERR(btf)) {
err = -PTR_ERR(btf);
btf = NULL;
@@ -596,7 +544,7 @@ static int do_dump(int argc, char **argv)
goto done;
}
if (!btf) {
- err = ENOENT;
+ err = -ENOENT;
p_err("can't find btf with ID (%u)", btf_id);
goto done;
}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 1cd75807673e..a43a6f10b564 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -504,6 +504,10 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
supported_types[prog_type] |= res;
+ if (!prog_type_name[prog_type]) {
+ p_info("program type name not found (type %d)", prog_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(prog_type_name[prog_type]) > maxlen) {
p_info("program type name too long");
@@ -533,6 +537,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
* check required for unprivileged users
*/
+ if (!map_type_name[map_type]) {
+ p_info("map type name not found (type %d)", map_type);
+ return;
+ }
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(map_type_name[map_type]) > maxlen) {
p_info("map type name too long");
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 33240fcc6319..c9dba7543dba 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -2,6 +2,7 @@
// Copyright (C) 2020 Facebook
#define _GNU_SOURCE
+#include <unistd.h>
#include <linux/err.h>
#include <bpf/libbpf.h>
@@ -9,11 +10,12 @@
static int do_pin(int argc, char **argv)
{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
const char *objfile, *path;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link;
- int err;
+ int err = -1, map_fd = -1;
if (!REQ_ARGS(2))
usage();
@@ -21,10 +23,26 @@ static int do_pin(int argc, char **argv)
objfile = GET_ARG();
path = GET_ARG();
+ /* optional arguments */
+ if (argc) {
+ if (is_prefix(*argv, "map")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(2)) {
+ p_err("incorrect map spec");
+ return -1;
+ }
+
+ map_fd = map_parse_fd(&argc, &argv);
+ if (map_fd < 0)
+ return -1;
+ }
+ }
+
obj = bpf_object__open(objfile);
if (IS_ERR(obj)) {
p_err("can't open objfile %s", objfile);
- return -1;
+ goto close_map_fd;
}
err = bpf_object__load(obj);
@@ -39,7 +57,10 @@ static int do_pin(int argc, char **argv)
goto close_obj;
}
- link = bpf_program__attach_iter(prog, NULL);
+ if (map_fd >= 0)
+ iter_opts.map_fd = map_fd;
+
+ link = bpf_program__attach_iter(prog, &iter_opts);
if (IS_ERR(link)) {
err = PTR_ERR(link);
p_err("attach_iter failed for program %s",
@@ -62,14 +83,18 @@ close_link:
bpf_link__destroy(link);
close_obj:
bpf_object__close(obj);
+close_map_fd:
+ if (map_fd >= 0)
+ close(map_fd);
return err;
}
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %1$s %2$s pin OBJ PATH\n"
+ "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
" %1$s %2$s help\n"
+ " " HELP_SPEC_MAP "\n"
"",
bin_name, "iter");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 326b8fdf0243..1b793759170e 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -22,6 +22,8 @@ static const char * const link_type_name[] = {
static int link_parse_fd(int *argc, char ***argv)
{
+ int fd;
+
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -35,7 +37,10 @@ static int link_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- return bpf_link_get_fd_by_id(id);
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ return fd;
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -316,6 +321,34 @@ static int do_pin(int argc, char **argv)
return err;
}
+static int do_detach(int argc, char **argv)
+{
+ int err, fd;
+
+ if (argc != 2) {
+ p_err("link specifier is invalid or missing\n");
+ return 1;
+ }
+
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return 1;
+
+ err = bpf_link_detach(fd);
+ if (err)
+ err = -errno;
+ close(fd);
+ if (err) {
+ p_err("failed link detach: %s", strerror(-err));
+ return 1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -326,6 +359,7 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s { show | list } [LINK]\n"
" %1$s %2$s pin LINK FILE\n"
+ " %1$s %2$s detach LINK\n"
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
@@ -341,6 +375,7 @@ static const struct cmd cmds[] = {
{ "list", do_show },
{ "help", do_help },
{ "pin", do_pin },
+ { "detach", do_detach },
{ 0 }
};
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3e6ecc6332e2..158995d853b0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_TRACING] = "tracing",
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_LSM] = "lsm",
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
};
diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
new file mode 100644
index 000000000000..a026df7dc280
--- /dev/null
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -0,0 +1,4 @@
+/FEATURE-DUMP.libbpf
+/bpf_helper_defs.h
+/fixdep
+/resolve_btfids
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 6956b6350cad..52d883325a23 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -403,62 +403,6 @@ static int symbols_collect(struct object *obj)
return 0;
}
-static struct btf *btf__parse_raw(const char *file)
-{
- struct btf *btf;
- struct stat st;
- __u8 *buf;
- FILE *f;
-
- if (stat(file, &st))
- return NULL;
-
- f = fopen(file, "rb");
- if (!f)
- return NULL;
-
- buf = malloc(st.st_size);
- if (!buf) {
- btf = ERR_PTR(-ENOMEM);
- goto exit_close;
- }
-
- if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
- btf = ERR_PTR(-EINVAL);
- goto exit_free;
- }
-
- btf = btf__new(buf, st.st_size);
-
-exit_free:
- free(buf);
-exit_close:
- fclose(f);
- return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
- __u16 magic = 0;
- int fd, nb_read;
-
- fd = open(file, O_RDONLY);
- if (fd < 0)
- return false;
-
- nb_read = read(fd, &magic, sizeof(magic));
- close(fd);
- return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
-static struct btf *btf_open(const char *path)
-{
- if (is_btf_raw(path))
- return btf__parse_raw(path);
- else
- return btf__parse_elf(path, NULL);
-}
-
static int symbols_resolve(struct object *obj)
{
int nr_typedefs = obj->nr_typedefs;
@@ -469,7 +413,7 @@ static int symbols_resolve(struct object *obj)
struct btf *btf;
__u32 nr;
- btf = btf_open(obj->btf ?: obj->path);
+ btf = btf__parse(obj->btf ?: obj->path, NULL);
err = libbpf_get_error(btf);
if (err) {
pr_err("FAILED: load BTF from %s: %s",
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 9ec01f4454f9..585486e40995 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -74,7 +74,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
# dependencies in the cmd file
if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \
@set -e; \
- $(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
+ $(echo-cmd) $(cmd_$(1)); \
+ $(dep-cmd))
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..b134e679e9db 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_cmd {
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
};
enum bpf_map_type {
@@ -230,6 +231,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET_SOCK_RELEASE,
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
+ BPF_XDP,
__MAX_BPF_ATTACH_TYPE
};
@@ -242,10 +244,18 @@ enum bpf_link_type {
BPF_LINK_TYPE_CGROUP = 3,
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
+ BPF_LINK_TYPE_XDP = 6,
MAX_BPF_LINK_TYPE,
};
+enum bpf_iter_link_info {
+ BPF_ITER_LINK_UNSPEC = 0,
+ BPF_ITER_LINK_MAP_FD = 1,
+
+ MAX_BPF_ITER_LINK_INFO,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -607,7 +617,10 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
- __u32 target_fd; /* object to attach to */
+ union {
+ __u32 target_fd; /* object to attach to */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
} link_create;
@@ -622,6 +635,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
@@ -3229,7 +3246,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
@@ -4057,6 +4074,9 @@ struct bpf_link_info {
__u32 netns_ino;
__u32 attach_type;
} netns;
+ struct {
+ __u32 ifindex;
+ } xdp;
};
} __attribute__((aligned(8)));
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a7329b671c41..eab14c97c15d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -598,10 +598,21 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
+ attr.link_create.flags = OPTS_GET(opts, flags, 0);
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
+int bpf_link_detach(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_detach.link_fd = link_fd;
+
+ return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index dbef24ebcfcb..28855fd5b5f4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -170,13 +170,16 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
};
-#define bpf_link_create_opts__last_field sz
+#define bpf_link_create_opts__last_field flags
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts);
+LIBBPF_API int bpf_link_detach(int link_fd);
+
struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 58eceb884df3..eebf020cbe3e 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -215,7 +215,7 @@ struct pt_regs;
#define PT_REGS_PARM5(x) ((x)->regs[8])
#define PT_REGS_RET(x) ((x)->regs[31])
#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_RC(x) ((x)->regs[2])
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
@@ -226,7 +226,7 @@ struct pt_regs;
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index c9e760e120dc..856b09a04563 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -386,7 +386,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
void btf__free(struct btf *btf)
{
- if (!btf)
+ if (IS_ERR_OR_NULL(btf))
return;
if (btf->fd >= 0)
@@ -562,6 +562,83 @@ done:
return btf;
}
+struct btf *btf__parse_raw(const char *path)
+{
+ void *data = NULL;
+ struct btf *btf;
+ FILE *f = NULL;
+ __u16 magic;
+ int err = 0;
+ long sz;
+
+ f = fopen(path, "rb");
+ if (!f) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* check BTF magic */
+ if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
+ err = -EIO;
+ goto err_out;
+ }
+ if (magic != BTF_MAGIC) {
+ /* definitely not a raw BTF */
+ err = -EPROTO;
+ goto err_out;
+ }
+
+ /* get file size */
+ if (fseek(f, 0, SEEK_END)) {
+ err = -errno;
+ goto err_out;
+ }
+ sz = ftell(f);
+ if (sz < 0) {
+ err = -errno;
+ goto err_out;
+ }
+ /* rewind to the start */
+ if (fseek(f, 0, SEEK_SET)) {
+ err = -errno;
+ goto err_out;
+ }
+
+ /* pre-alloc memory and read all of BTF data */
+ data = malloc(sz);
+ if (!data) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ if (fread(data, 1, sz, f) < sz) {
+ err = -EIO;
+ goto err_out;
+ }
+
+ /* finally parse BTF data */
+ btf = btf__new(data, sz);
+
+err_out:
+ free(data);
+ if (f)
+ fclose(f);
+ return err ? ERR_PTR(err) : btf;
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+ struct btf *btf;
+
+ if (btf_ext)
+ *btf_ext = NULL;
+
+ btf = btf__parse_raw(path);
+ if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+ return btf;
+
+ return btf__parse_elf(path, btf_ext);
+}
+
static int compare_vsi_off(const void *_a, const void *_b)
{
const struct btf_var_secinfo *a = _a;
@@ -1025,7 +1102,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
void btf_ext__free(struct btf_ext *btf_ext)
{
- if (!btf_ext)
+ if (IS_ERR_OR_NULL(btf_ext))
return;
free(btf_ext->data);
free(btf_ext);
@@ -2951,41 +3028,6 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
return 0;
}
-static struct btf *btf_load_raw(const char *path)
-{
- struct btf *btf;
- size_t read_cnt;
- struct stat st;
- void *data;
- FILE *f;
-
- if (stat(path, &st))
- return ERR_PTR(-errno);
-
- data = malloc(st.st_size);
- if (!data)
- return ERR_PTR(-ENOMEM);
-
- f = fopen(path, "rb");
- if (!f) {
- btf = ERR_PTR(-errno);
- goto cleanup;
- }
-
- read_cnt = fread(data, 1, st.st_size, f);
- fclose(f);
- if (read_cnt < st.st_size) {
- btf = ERR_PTR(-EBADF);
- goto cleanup;
- }
-
- btf = btf__new(data, read_cnt);
-
-cleanup:
- free(data);
- return btf;
-}
-
/*
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
@@ -3021,7 +3063,7 @@ struct btf *libbpf_find_kernel_btf(void)
continue;
if (locations[i].raw_btf)
- btf = btf_load_raw(path);
+ btf = btf__parse_raw(path);
else
btf = btf__parse_elf(path, NULL);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 491c7b41ffdc..f4a1a1d2b9a3 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -64,8 +64,9 @@ struct btf_ext_header {
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
-LIBBPF_API struct btf *btf__parse_elf(const char *path,
- struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e1c344504cae..cf711168d34a 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -183,7 +183,7 @@ void btf_dump__free(struct btf_dump *d)
{
int i, cnt;
- if (!d)
+ if (IS_ERR_OR_NULL(d))
return;
free(d->type_states);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 846164c79df1..7be04e45d29c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6504,7 +6504,7 @@ void bpf_object__close(struct bpf_object *obj)
{
size_t i;
- if (!obj)
+ if (IS_ERR_OR_NULL(obj))
return;
if (obj->clear_priv)
@@ -6915,7 +6915,8 @@ static const struct bpf_sec_def section_defs[] = {
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
BPF_XDP_CPUMAP),
- BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP,
+ BPF_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
@@ -7689,7 +7690,7 @@ int bpf_link__destroy(struct bpf_link *link)
{
int err = 0;
- if (!link)
+ if (IS_ERR_OR_NULL(link))
return 0;
if (!link->disconnected && link->detach)
@@ -7747,6 +7748,11 @@ struct bpf_link *bpf_link__open(const char *path)
return link;
}
+int bpf_link__detach(struct bpf_link *link)
+{
+ return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
int bpf_link__pin(struct bpf_link *link, const char *path)
{
int err;
@@ -7833,6 +7839,9 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
pr_warn("program '%s': failed to attach to pfd %d: %s\n",
bpf_program__title(prog, false), pfd,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ bpf_program__title(prog, false), pfd);
return ERR_PTR(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
@@ -8278,17 +8287,30 @@ bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
return bpf_program__attach_fd(prog, netns_fd, "netns");
}
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+{
+ /* target_fd/target_ifindex use the same field in LINK_CREATE */
+ return bpf_program__attach_fd(prog, ifindex, "xdp");
+}
+
struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int prog_fd, link_fd;
+ __u32 target_fd = 0;
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
return ERR_PTR(-EINVAL);
+ if (OPTS_HAS(opts, map_fd)) {
+ target_fd = opts->map_fd;
+ link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
+ }
+
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("program '%s': can't attach before loaded\n",
@@ -8301,7 +8323,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
return ERR_PTR(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+ link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
+ &link_create_opts);
if (link_fd < 0) {
link_fd = -errno;
free(link);
@@ -8484,7 +8507,7 @@ void perf_buffer__free(struct perf_buffer *pb)
{
int i;
- if (!pb)
+ if (IS_ERR_OR_NULL(pb))
return;
if (pb->cpu_bufs) {
for (i = 0; i < pb->cpu_cnt; i++) {
@@ -9361,8 +9384,7 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
for (i = 0; i < s->prog_cnt; i++) {
struct bpf_link **link = s->progs[i].link;
- if (!IS_ERR_OR_NULL(*link))
- bpf_link__destroy(*link);
+ bpf_link__destroy(*link);
*link = NULL;
}
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c2272132e929..3ed1399bfbbc 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -229,6 +229,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
@@ -257,6 +258,8 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
LIBBPF_API struct bpf_link *
bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
struct bpf_map;
@@ -264,8 +267,9 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 map_fd;
};
-#define bpf_iter_attach_opts__last_field sz
+#define bpf_iter_attach_opts__last_field map_fd
LIBBPF_API struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 6f0856abe299..0c4722bfdd0a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -273,6 +273,8 @@ LIBBPF_0.0.9 {
LIBBPF_0.1.0 {
global:
+ bpf_link__detach;
+ bpf_link_detach;
bpf_map__ifindex;
bpf_map__key_size;
bpf_map__map_flags;
@@ -286,9 +288,12 @@ LIBBPF_0.1.0 {
bpf_map__set_value_size;
bpf_map__type;
bpf_map__value_size;
+ bpf_program__attach_xdp;
bpf_program__autoload;
bpf_program__is_sk_lookup;
bpf_program__set_autoload;
bpf_program__set_sk_lookup;
+ btf__parse;
+ btf__parse_raw;
btf__set_fd;
} LIBBPF_0.0.9;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0fb910df5387..033051717ba5 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -290,3 +290,26 @@ free_mem:
free(fhp);
return ret;
}
+
+int cgroup_setup_and_join(const char *path) {
+ int cg_fd;
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "Failed to setup cgroup environment\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(path);
+ if (cg_fd < 0) {
+ fprintf(stderr, "Failed to create test cgroup\n");
+ cleanup_cgroup_environment();
+ return cg_fd;
+ }
+
+ if (join_cgroup(path)) {
+ fprintf(stderr, "Failed to join cgroup\n");
+ cleanup_cgroup_environment();
+ return -EINVAL;
+ }
+ return cg_fd;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index d64bb8957090..5fe3d88e4f0d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -9,6 +9,7 @@
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path);
int join_cgroup(const char *path);
int setup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e8da7b39158d..b8d6aef99db4 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -58,20 +58,10 @@ int main(int argc, char **argv)
int exit_code = 1;
char buf[256];
- err = setup_cgroup_environment();
- if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
- errno))
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
- cgroup_fd, errno))
- goto cleanup_cgroup_env;
-
- err = join_cgroup(TEST_CGROUP);
- if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
- goto cleanup_cgroup_env;
-
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index fed42755416d..4ffefdc1130f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -15,6 +15,13 @@
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
#include "bpf_iter_test_kern4.skel.h"
+#include "bpf_iter_bpf_hash_map.skel.h"
+#include "bpf_iter_bpf_percpu_hash_map.skel.h"
+#include "bpf_iter_bpf_array_map.skel.h"
+#include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_map.skel.h"
+#include "bpf_iter_test_kern5.skel.h"
+#include "bpf_iter_test_kern6.skel.h"
static int duration;
@@ -455,6 +462,440 @@ out:
bpf_iter_test_kern4__destroy(skel);
}
+static void test_bpf_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_hash_map *skel;
+ int err, i, len, map_fd, iter_fd;
+ __u64 val, expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+
+ skel = bpf_iter_bpf_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->bss->in_test_mode = true;
+
+ err = bpf_iter_bpf_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* iterator with hashmap2 and hashmap3 should fail */
+ opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap2 unexpected succeeded\n"))
+ goto out;
+
+ opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter",
+ "attach_iter for hashmap3 unexpected succeeded\n"))
+ goto out;
+
+ /* hashmap1 should be good, update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ val = i + 4;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ opts.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_hash_map__destroy(skel);
+}
+
+static void test_bpf_percpu_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_hash_map *skel;
+ int err, i, j, len, map_fd, iter_fd;
+ __u32 expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+ void *val;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_hash_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_hash_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_key_c += key.c;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ opts.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum_a != expected_key_a,
+ "key_sum_a", "got %u expected %u\n",
+ skel->bss->key_sum_a, expected_key_a))
+ goto close_iter;
+ if (CHECK(skel->bss->key_sum_b != expected_key_b,
+ "key_sum_b", "got %u expected %u\n",
+ skel->bss->key_sum_b, expected_key_b))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_hash_map__destroy(skel);
+}
+
+static void test_bpf_array_map(void)
+{
+ __u64 val, expected_val = 0, res_first_val, first_val = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ __u32 expected_key = 0, res_first_key;
+ struct bpf_iter_bpf_array_map *skel;
+ int err, i, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64] = {};
+ int len, start;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ val = i + 4;
+ expected_key += i;
+ expected_val += val;
+
+ if (i == 0)
+ first_val = val;
+
+ err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ opts.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ start = 0;
+ while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
+ start += len;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ res_first_key = *(__u32 *)buf;
+ res_first_val = *(__u64 *)(buf + sizeof(__u32));
+ if (CHECK(res_first_key != 0 || res_first_val != first_val,
+ "bpf_seq_write",
+ "seq_write failure: first key %u vs expected 0, "
+ " first value %llu vs expected %llu\n",
+ res_first_key, res_first_val, first_val))
+ goto close_iter;
+
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %llu expected %llu\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ err = bpf_map_lookup_elem(map_fd, &i, &val);
+ if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+ goto out;
+ if (CHECK(i != val, "invalid_val",
+ "got value %llu expected %u\n", val, i))
+ goto out;
+ }
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_percpu_array_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_array_map *skel;
+ __u32 expected_key = 0, expected_val = 0;
+ int err, i, j, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64];
+ void *val;
+ int len;
+
+ val = malloc(8 * bpf_num_possible_cpus());
+
+ skel = bpf_iter_bpf_percpu_array_map__open();
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
+ "skeleton open failed\n"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+ err = bpf_iter_bpf_percpu_array_map__load(skel);
+ if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
+ "skeleton load failed\n"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ expected_key += i;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ opts.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->key_sum != expected_key,
+ "key_sum", "got %u expected %u\n",
+ skel->bss->key_sum, expected_key))
+ goto close_iter;
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_array_map__destroy(skel);
+}
+
+static void test_bpf_sk_storage_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, i, len, map_fd, iter_fd, num_sockets;
+ struct bpf_iter_bpf_sk_storage_map *skel;
+ int sock_fd[3] = {-1, -1, -1};
+ __u32 val, expected_val = 0;
+ struct bpf_link *link;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+ num_sockets = ARRAY_SIZE(sock_fd);
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
+ if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+ goto out;
+
+ val = i + 1;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
+ BPF_NOEXIST);
+ if (CHECK(err, "map_update", "map_update failed\n"))
+ goto out;
+ }
+
+ opts.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
+ "ipv6_sk_count", "got %u expected %u\n",
+ skel->bss->ipv6_sk_count, num_sockets))
+ goto close_iter;
+
+ if (CHECK(skel->bss->val_sum != expected_val,
+ "val_sum", "got %u expected %u\n",
+ skel->bss->val_sum, expected_val))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; i < num_sockets; i++) {
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ }
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
+static void test_rdonly_buf_out_of_bound(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_test_kern5 *skel;
+ struct bpf_link *link;
+
+ skel = bpf_iter_test_kern5__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ bpf_link__destroy(link);
+
+ bpf_iter_test_kern5__destroy(skel);
+}
+
+static void test_buf_neg_offset(void)
+{
+ struct bpf_iter_test_kern6 *skel;
+
+ skel = bpf_iter_test_kern6__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
+ "skeleton open_and_load unexpected success\n"))
+ bpf_iter_test_kern6__destroy(skel);
+}
+
void test_bpf_iter(void)
{
if (test__start_subtest("btf_id_or_null"))
@@ -491,4 +932,18 @@ void test_bpf_iter(void)
test_overflow(true, false);
if (test__start_subtest("prog-ret-1"))
test_overflow(false, true);
+ if (test__start_subtest("bpf_hash_map"))
+ test_bpf_hash_map();
+ if (test__start_subtest("bpf_percpu_hash_map"))
+ test_bpf_percpu_hash_map();
+ if (test__start_subtest("bpf_array_map"))
+ test_bpf_array_map();
+ if (test__start_subtest("bpf_percpu_array_map"))
+ test_bpf_percpu_array_map();
+ if (test__start_subtest("bpf_sk_storage_map"))
+ test_bpf_sk_storage_map();
+ if (test__start_subtest("rdonly-buf-out-of-bound"))
+ test_rdonly_buf_out_of_bound();
+ if (test__start_subtest("buf-neg-offset"))
+ test_buf_neg_offset();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
new file mode 100644
index 000000000000..643dfa35419c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+#include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const void *key,
+ struct cgroup_value *expected)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
+ "map-lookup", "errno %d", errno))
+ return true;
+ if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+ "assert-storage", "storages differ"))
+ return true;
+
+ return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
+ "map-lookup", "succeeded, expected ENOENT"))
+ return true;
+ if (CHECK(errno != ENOENT,
+ "map-lookup", "errno %d, expected ENOENT", errno))
+ return true;
+
+ return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+ bool res = true;
+ int server_fd = -1, client_fd = -1;
+
+ if (join_cgroup(cgroup_path))
+ goto out_clean;
+
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (server_fd < 0)
+ goto out_clean;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (client_fd < 0)
+ goto out_clean;
+
+ if (send(client_fd, "message", strlen("message"), 0) < 0)
+ goto out_clean;
+
+ res = false;
+
+out_clean:
+ close(client_fd);
+ close(server_fd);
+ return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_egress_only *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_link = NULL, *child_link = NULL;
+ bool err;
+
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+
+ obj = cg_storage_multi_egress_only__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is only one run and in that run the storage is
+ * parent cgroup's storage.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
+ "err %ld", PTR_ERR(parent_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 1,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there are two additional runs, one that run with parent
+ * cgroup's storage and one with child cgroup's storage.
+ */
+ child_link = bpf_program__attach_cgroup(obj->progs.egress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_link), "child-cg-attach",
+ "err %ld", PTR_ERR(child_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_link))
+ bpf_link__destroy(parent_link);
+ if (!IS_ERR(child_link))
+ bpf_link__destroy(child_link);
+
+ cg_storage_multi_egress_only__destroy(obj);
+}
+
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_isolated *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_isolated__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress, stored in separate parent storages.
+ * Also assert that child cgroup's storages does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ * Assert that egree and ingress storages are separate.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_shared *obj;
+ struct cgroup_value expected_cgroup_value;
+ __u64 key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_shared__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+ "err %ld", PTR_ERR(parent_egress1_link)))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+ "err %ld", PTR_ERR(parent_egress2_link)))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+ "err %ld", PTR_ERR(parent_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+ "err %ld", PTR_ERR(child_egress1_link)))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+ "err %ld", PTR_ERR(child_egress2_link)))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+ "err %ld", PTR_ERR(child_ingress_link)))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 4,
+ .ingress_pkts = 2,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ if (!IS_ERR(parent_egress1_link))
+ bpf_link__destroy(parent_egress1_link);
+ if (!IS_ERR(parent_egress2_link))
+ bpf_link__destroy(parent_egress2_link);
+ if (!IS_ERR(parent_ingress_link))
+ bpf_link__destroy(parent_ingress_link);
+ if (!IS_ERR(child_egress1_link))
+ bpf_link__destroy(child_egress1_link);
+ if (!IS_ERR(child_egress2_link))
+ bpf_link__destroy(child_egress2_link);
+ if (!IS_ERR(child_ingress_link))
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_shared__destroy(obj);
+}
+
+void test_cg_storage_multi(void)
+{
+ int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+ parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+ goto close_cgroup_fd;
+ child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+ if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ if (test__start_subtest("egress_only"))
+ test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("isolated"))
+ test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("shared"))
+ test_shared(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..4d9b514b3fd9 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
- __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+ struct bpf_link_info info;
int i = 0, err, prog_fd;
bool detach_legacy = false;
@@ -219,6 +221,22 @@ void test_cgroup_link(void)
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ err = bpf_link__detach(links[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* cgroup_id should be zero in link_info */
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ /* First BPF program shouldn't be called anymore */
+ ping_and_check(0, cg_nr - 1);
+
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
index 78e30d3a23d5..6acb0e94d4d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_retro.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -6,7 +6,7 @@
void test_core_retro(void)
{
- int err, zero = 0, res, duration = 0;
+ int err, zero = 0, res, duration = 0, my_pid = getpid();
struct test_core_retro *skel;
/* load program */
@@ -14,6 +14,10 @@ void test_core_retro(void)
if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
goto out_close;
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
+ if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+ goto out_close;
+
/* attach probe */
err = test_core_retro__attach(skel);
if (CHECK(err, "attach_kprobe", "err %d\n", err))
@@ -26,7 +30,7 @@ void test_core_retro(void)
if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
goto out_close;
- CHECK(res != getpid(), "pid_check", "got %d != exp %d\n", res, getpid());
+ CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
out_close:
test_core_retro__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
new file mode 100644
index 000000000000..d884b2ed5bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
+
+void test_get_stackid_cannot_attach(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 1,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct test_stacktrace_build_id *skel;
+ __u32 duration = 0;
+ int pmu_fd, err;
+
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
+ "should have failed\n");
+ close(pmu_fd);
+
+ /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
+ "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ close(pmu_fd);
+
+ /* add exclude_callchain_kernel, attach should fail */
+ attr.exclude_callchain_kernel = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
+ "should have failed\n");
+ close(pmu_fd);
+
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 000000000000..72c3690844fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+ static int val = 1;
+
+ val += 1;
+
+ usleep(100);
+ return val;
+}
+
+noinline int func_2(void)
+{
+ return func_1();
+}
+
+noinline int func_3(void)
+{
+ return func_2();
+}
+
+noinline int func_4(void)
+{
+ return func_3();
+}
+
+noinline int func_5(void)
+{
+ return func_4();
+}
+
+noinline int func_6(void)
+{
+ int i, val = 1;
+
+ for (i = 0; i < 100; i++)
+ val += func_5();
+
+ return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 2,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_CALLCHAIN,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct perf_event_stackmap *skel;
+ __u32 duration = 0;
+ cpu_set_t cpu_set;
+ int pmu_fd, err;
+
+ skel = perf_event_stackmap__open();
+
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ err = perf_event_stackmap__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0) {
+ printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ close(pmu_fd);
+ goto cleanup;
+ }
+
+ /* create kernel and user stack traces for testing */
+ func_6();
+
+ CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+ CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+ CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+ CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+
+cleanup:
+ perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 713167449c98..8b571890c57e 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -35,7 +35,7 @@ static struct sec_name_test tests[] = {
{-EINVAL, 0},
},
{"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
- {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+ {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} },
{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index f1784ae4565a..c571584c00f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -34,6 +34,7 @@
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
+#include "testing_helpers.h"
#include "test_sk_lookup.skel.h"
/* External (address, port) pairs the client sends packets to. */
@@ -74,6 +75,7 @@ struct test {
struct inet_addr connect_to;
struct inet_addr listen_at;
enum server accept_on;
+ bool reuseport_has_conns; /* Add a connected socket to reuseport group */
};
static __u32 duration; /* for CHECK macro */
@@ -468,34 +470,10 @@ static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
return 0;
}
-static __u32 link_info_prog_id(struct bpf_link *link)
-{
- struct bpf_link_info info = {};
- __u32 info_len = sizeof(info);
- int link_fd, err;
-
- link_fd = bpf_link__fd(link);
- if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
- errno = -link_fd;
- log_err("bpf_link__fd failed");
- return 0;
- }
-
- err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
- if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
- log_err("bpf_obj_get_info_by_fd");
- return 0;
- }
- if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
- "unexpected info len %u\n", info_len))
- return 0;
-
- return info.prog_id;
-}
-
static void query_lookup_prog(struct test_sk_lookup *skel)
{
struct bpf_link *link[3] = {};
+ struct bpf_link_info info;
__u32 attach_flags = 0;
__u32 prog_ids[3] = {};
__u32 prog_cnt = 3;
@@ -533,18 +511,36 @@ static void query_lookup_prog(struct test_sk_lookup *skel)
if (CHECK(prog_cnt != 3, "bpf_prog_query",
"wrong program count on query: %u", prog_cnt))
goto detach;
- prog_id = link_info_prog_id(link[0]);
+ prog_id = link_info_prog_id(link[0], &info);
CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
"invalid program #0 id on query: %u != %u\n",
prog_ids[0], prog_id);
- prog_id = link_info_prog_id(link[1]);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[1], &info);
CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
"invalid program #1 id on query: %u != %u\n",
prog_ids[1], prog_id);
- prog_id = link_info_prog_id(link[2]);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[2], &info);
CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
"invalid program #2 id on query: %u != %u\n",
prog_ids[2], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+ err = bpf_link__detach(link[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto detach;
+
+ /* prog id is still there, but netns_ino is zeroed out */
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino != 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
detach:
if (link[2])
@@ -559,7 +555,8 @@ close:
static void run_lookup_prog(const struct test *t)
{
- int client_fd, server_fds[MAX_SERVERS] = { -1 };
+ int server_fds[MAX_SERVERS] = { -1 };
+ int client_fd, reuse_conn_fd = -1;
struct bpf_link *lookup_link;
int i, err;
@@ -583,6 +580,32 @@ static void run_lookup_prog(const struct test *t)
break;
}
+ /* Regular UDP socket lookup with reuseport behaves
+ * differently when reuseport group contains connected
+ * sockets. Check that adding a connected UDP socket to the
+ * reuseport group does not affect how reuseport works with
+ * BPF socket lookup.
+ */
+ if (t->reuseport_has_conns) {
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+
+ /* Add an extra socket to reuseport group */
+ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (reuse_conn_fd < 0)
+ goto close;
+
+ /* Connect the extra socket to itself */
+ err = getsockname(reuse_conn_fd, (void *)&addr, &len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto close;
+ err = connect(reuse_conn_fd, (void *)&addr, len);
+ if (CHECK(err, "connect", "errno %d\n", errno))
+ goto close;
+ }
+
client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
if (client_fd < 0)
goto close;
@@ -594,6 +617,8 @@ static void run_lookup_prog(const struct test *t)
close(client_fd);
close:
+ if (reuse_conn_fd != -1)
+ close(reuse_conn_fd);
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
if (server_fds[i] != -1)
close(server_fds[i]);
@@ -711,6 +736,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel)
.accept_on = SERVER_B,
},
{
+ .desc = "UDP IPv4 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
.desc = "UDP IPv4 redir skip reuseport",
.lookup_prog = skel->progs.select_sock_a_no_reuseport,
.reuseport_prog = skel->progs.select_sock_b,
@@ -755,6 +791,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel)
.accept_on = SERVER_B,
},
{
+ .desc = "UDP IPv6 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
.desc = "UDP IPv6 redir skip reuseport",
.lookup_prog = skel->progs.select_sock_a_no_reuseport,
.reuseport_prog = skel->progs.select_sock_b,
@@ -1238,8 +1285,8 @@ static void run_tests(struct test_sk_lookup *skel)
static int switch_netns(void)
{
static const char * const setup_script[] = {
- "ip -6 addr add dev lo " EXT_IP6 "/128 nodad",
- "ip -6 addr add dev lo " INT_IP6 "/128 nodad",
+ "ip -6 addr add dev lo " EXT_IP6 "/128",
+ "ip -6 addr add dev lo " INT_IP6 "/128",
"ip link set dev lo up",
NULL,
};
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 7021b92af313..25de86af2d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,7 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .ifindex = 1,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
@@ -92,6 +93,10 @@ void test_skb_ctx(void)
"ctx_out_priority",
"skb->priority == %d, expected %d\n",
skb.priority, 7);
+ CHECK_ATTR(skb.ifindex != 1,
+ "ctx_out_ifindex",
+ "skb->ifindex == %d, expected %d\n",
+ skb.ifindex, 1);
CHECK_ATTR(skb.tstamp != 8,
"ctx_out_tstamp",
"skb->tstamp == %lld, expected %d\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
new file mode 100644
index 000000000000..6f814999b395
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+#include "test_xdp_link.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_link(void)
+{
+ __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
+ struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ struct bpf_link_info link_info;
+ struct bpf_prog_info prog_info;
+ struct bpf_link *link;
+ __u32 link_info_len = sizeof(link_info);
+ __u32 prog_info_len = sizeof(prog_info);
+
+ skel1 = test_xdp_link__open_and_load();
+ if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
+
+ skel2 = test_xdp_link__open_and_load();
+ if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+ goto cleanup;
+ prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+ goto cleanup;
+ id1 = prog_info.id;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+ if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+ goto cleanup;
+ id2 = prog_info.id;
+
+ /* set initial prog attachment */
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+ goto cleanup;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+
+ /* BPF link is not allowed to replace prog attachment */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ /* best-effort detach prog */
+ opts.old_fd = prog_fd1;
+ bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ goto cleanup;
+ }
+
+ /* detach BPF program */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(err, "prog_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* now BPF link should attach successfully */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel1->links.xdp_handler = link;
+
+ /* validate prog ID */
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* BPF prog attach is not allowed to replace BPF link */
+ opts.old_fd = prog_fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-update when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+ if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* Can't force-detach when BPF link is active */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+ goto cleanup;
+
+ /* BPF link is not allowed to replace another BPF link */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ bpf_link__destroy(skel1->links.xdp_handler);
+ skel1->links.xdp_handler = NULL;
+
+ /* new link attach should succeed */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ skel2->links.xdp_handler = link;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id2, "id2_check",
+ "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+ goto cleanup;
+
+ /* updating program under active BPF link works as expected */
+ err = bpf_link__update_program(link, skel1->progs.xdp_handler);
+ if (CHECK(err, "link_upd", "failed: %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+
+ CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
+ "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+
+ err = bpf_link__detach(link);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ /* ifindex should be zeroed out */
+ CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+
+cleanup:
+ test_xdp_link__destroy(skel1);
+ test_xdp_link__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 17db3bac518b..c196280df90d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -11,6 +11,8 @@
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
+#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
+#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
@@ -22,6 +24,8 @@
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
+#undef bpf_iter__bpf_map_elem
+#undef bpf_iter__bpf_sk_storage_map
struct bpf_iter_meta {
struct seq_file *seq;
@@ -78,3 +82,17 @@ struct udp6_sock {
struct udp_sock udp;
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map_elem {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ void *value;
+};
+
+struct bpf_iter__bpf_sk_storage_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ struct sock *sk;
+ void *value;
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
new file mode 100644
index 000000000000..6286023fd62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap1 SEC(".maps");
+
+__u32 key_sum = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ bpf_seq_write(ctx->meta->seq, key, sizeof(__u32));
+ bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
+ key_sum += *key;
+ val_sum += *val;
+ *val = *key;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
new file mode 100644
index 000000000000..07ddbfdbcab7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap3 SEC(".maps");
+
+/* will set before prog run */
+bool in_test_mode = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u32 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+ struct key_t *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (in_test_mode) {
+ /* test mode is used by selftests to
+ * test functionality of bpf_hash_map iter.
+ *
+ * the above hashmap1 will have correct size
+ * and will be accepted, hashmap2 and hashmap3
+ * should be rejected due to smaller key/value
+ * size.
+ */
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+ val_sum += *val;
+ return 0;
+ }
+
+ /* non-test mode, the map is prepared with the
+ * below bpftool command sequence:
+ * bpftool map create /sys/fs/bpf/m1 type hash \
+ * key 12 value 8 entries 3 name map1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \
+ * value 0 0 0 1 0 0 0 1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \
+ * value 0 0 0 1 0 0 0 2
+ * The bpftool iter command line:
+ * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \
+ * map id 77
+ * The below output will be:
+ * map dump starts
+ * 77: (1000000 0 2000000) (200000001000000)
+ * 77: (1000000 0 1000000) (100000001000000)
+ * map dump ends
+ */
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "map dump starts\n");
+
+ if (key == (void *)0 || val == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "map dump ends\n");
+ return 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id,
+ key->a, key->b, key->c, *val);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
new file mode 100644
index 000000000000..85fa710fad90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u32);
+} arraymap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+__u32 key_sum = 0, val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum += *key;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
new file mode 100644
index 000000000000..feaaa2b89c57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u32 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct key_t *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
new file mode 100644
index 000000000000..6b70ccaba301
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+__u32 val_sum = 0;
+__u32 ipv6_sk_count = 0;
+
+SEC("iter/bpf_sk_storage_map")
+int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == (void *)0 || val == (void *)0)
+ return 0;
+
+ if (sk->sk_family == AF_INET6)
+ ipv6_sk_count++;
+
+ val_sum += *val;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
new file mode 100644
index 000000000000..e3a7575e81d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *key = ctx->key;
+
+ if (key == (void *)0)
+ return 0;
+
+ /* out of bound access w.r.t. hashmap1 */
+ key_sum += *(__u32 *)(key + sizeof(struct key_t));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
new file mode 100644
index 000000000000..1c7304f56b1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *value = ctx->value;
+
+ if (value == (void *)0)
+ return 0;
+
+ /* negative offset, verifier failure. */
+ value_sum += *(__u32 *)(value - 4);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
new file mode 100644
index 000000000000..a0778fe7857a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+ __u32 egress_pkts;
+ __u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
new file mode 100644
index 000000000000..44ad46b33539
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
new file mode 100644
index 000000000000..a25373002055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
new file mode 100644
index 000000000000..a149f33bc533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 000000000000..25467d13c356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+ stack_trace_t *trace;
+ __u32 key = 0;
+ long val;
+
+ val = bpf_get_stackid(ctx, &stackmap, 0);
+ if (val > 0)
+ stackid_kernel = 2;
+ val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+ if (val > 0)
+ stackid_user = 2;
+
+ trace = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!trace)
+ return 0;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0);
+ if (val > 0)
+ stack_kernel = 2;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK);
+ if (val > 0)
+ stack_user = 2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
index 75c60c3c29cf..20861ec2f674 100644
--- a/tools/testing/selftests/bpf/progs/test_core_retro.c
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -13,6 +13,13 @@ struct {
__uint(max_entries, 1);
__type(key, int);
__type(value, int);
+} exp_tgid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
} results SEC(".maps");
SEC("tp/raw_syscalls/sys_enter")
@@ -21,6 +28,12 @@ int handle_sys_enter(void *ctx)
struct task_struct *task = (void *)bpf_get_current_task();
int tgid = BPF_CORE_READ(task, tgid);
int zero = 0;
+ int real_tgid = bpf_get_current_pid_tgid() >> 32;
+ int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero);
+
+ /* only pass through sys_enters from test process */
+ if (!exp_tgid || *exp_tgid != real_tgid)
+ return 0;
bpf_map_update_elem(&results, &zero, &tgid, 0);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
new file mode 100644
index 000000000000..eb93ea95d1d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+SEC("xdp/handler")
+int xdp_handler(struct xdp_md *xdp)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
index 8429b22525a7..165e3c2dd9a3 100644
--- a/tools/testing/selftests/bpf/progs/udp_limit.c
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -6,14 +6,28 @@
int invocations = 0, in_use = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_map SEC(".maps");
+
SEC("cgroup/sock_create")
int sock(struct bpf_sock *ctx)
{
+ int *sk_storage;
__u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_storage)
+ return 0;
+ *sk_storage = 0xdeadbeef;
+
__sync_fetch_and_add(&invocations, 1);
if (in_use > 0) {
@@ -31,11 +45,16 @@ int sock(struct bpf_sock *ctx)
SEC("cgroup/sock_release")
int sock_release(struct bpf_sock *ctx)
{
+ int *sk_storage;
__u32 key;
if (ctx->type != SOCK_DGRAM)
return 1;
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0);
+ if (!sk_storage || *sk_storage != 0xdeadbeef)
+ return 0;
+
__sync_fetch_and_add(&invocations, 1);
__sync_fetch_and_add(&in_use, -1);
return 1;
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index a53ed58528d6..bfff82be3fc1 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -34,7 +34,7 @@ serverPort = int(sys.argv[1])
# create active socket
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
try:
- sock.connect(('localhost', serverPort))
+ sock.connect(('::1', serverPort))
except socket.error as e:
sys.exit(1)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index 0ca60d193bed..42ab8882f00f 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -38,7 +38,7 @@ serverSocket = None
# create passive socket
serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try: serverSocket.bind(('localhost', 0))
+try: serverSocket.bind(('::1', 0))
except socket.error as msg:
print('bind fails: ' + str(msg))
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 655729004391..d946252a25bb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -74,22 +74,7 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to setup cgroup environment\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
/* Attach the bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index d850fb9076b5..804dddd97d4c 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -33,21 +33,10 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load DEV_CGROUP program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (cgroup_fd < 0) {
printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
+ goto out;
}
/* Attach bpf program */
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index c1da5404454a..a7b9a69f4fd5 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -58,22 +58,9 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load bpf program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (cgroup_fd < 0)
goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
/* Attach bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
@@ -82,9 +69,9 @@ int main(int argc, char **argv)
}
if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
else
- assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
+ assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
&prog_cnt)) {
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 356351c0ac28..4a64306728ab 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -160,16 +160,10 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CGROUP_PATH);
+ cgfd = cgroup_setup_and_join(CGROUP_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
if (send_packet(argv[1]))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 52bf14955797..9613f7538840 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -464,16 +464,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 0358814c67dc..b8c72c1d9cf7 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1638,16 +1638,10 @@ int main(int argc, char **argv)
exit(err);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index f0fc103261a4..6c9f269c396d 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -421,19 +421,11 @@ int main(int argc, char **argv)
struct bpf_object *obj;
struct bpf_map *map;
- err = setup_cgroup_environment();
- CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
- err, errno);
-
- atexit(cleanup_cgroup_environment);
-
/* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
"cgroup_fd:%d errno:%d", cgroup_fd, errno);
-
- err = join_cgroup(TEST_CGROUP);
- CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
+ atexit(cleanup_cgroup_environment);
err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 15653b0e26eb..154a8fd2a48d 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -191,16 +191,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_test(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 78789b27e573..9b6fb00dc7a0 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1963,23 +1963,9 @@ int main(int argc, char **argv)
}
if (!cg_fd) {
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
-
- cg_fd = create_and_get_cgroup(CG_PATH);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, strerror(errno));
+ cg_fd = cgroup_setup_and_join(CG_PATH);
+ if (cg_fd < 0)
return cg_fd;
- }
-
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
- }
cg_created = 1;
}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index d196e2a4a6e0..a20a919244c0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1619,16 +1619,10 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
-
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 3ae127620463..74a9e49988b6 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -102,16 +102,10 @@ int main(int argc, char **argv)
__u32 key = 0;
int rv;
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index f9765ddf0761..8549b31716ab 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -86,16 +86,10 @@ int main(int argc, char **argv)
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c4b17e08d431..dd80f0c84afb 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,52 +10,72 @@
# | xdp forwarding |
# ------------------
-cleanup()
+ret=0
+
+setup()
{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_redirect [PASS]";
- else
- echo "selftests: test_xdp_redirect [FAILED]";
- fi
- set +e
+ local xdpmode=$1
+
+ ip netns add ns1
+ ip netns add ns2
+
+ ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ ip link add veth2 index 222 type veth peer name veth22 netns ns2
+
+ ip link set veth1 up
+ ip link set veth2 up
+ ip -n ns1 link set dev veth11 up
+ ip -n ns2 link set dev veth22 up
+
+ ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ ip -n ns2 addr add 10.1.1.22/24 dev veth22
+}
+
+cleanup()
+{
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
ip netns del ns1 2> /dev/null
ip netns del ns2 2> /dev/null
}
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
+test_xdp_redirect()
+{
+ local xdpmode=$1
-trap cleanup 0 2 3 6 9
+ setup
-ip link add veth1 index 111 type veth peer name veth11
-ip link add veth2 index 222 type veth peer name veth22
+ ip link set dev veth1 $xdpmode off &> /dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
+ return 0
+ fi
-ip link set veth11 netns ns1
-ip link set veth22 netns ns2
+ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
+ ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
-ip link set veth1 up
-ip link set veth2 up
+ ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null
+ local ret1=$?
+ ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null
+ local ret2=$?
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+ if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then
+ echo "selftests: test_xdp_redirect $xdpmode [PASS]";
+ else
+ ret=1
+ echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
+ fi
-ip netns exec ns1 ip link set dev veth11 up
-ip netns exec ns2 ip link set dev veth22 up
+ cleanup
+}
-ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
-ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+set -e
+trap cleanup 2 3 6 9
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+test_xdp_redirect xdpgeneric
+test_xdp_redirect xdpdrv
-exit 0
+exit $ret
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0af6337a8962..800d503e5cb4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ int err;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ if (err) {
+ printf("failed to get link info: %d\n", -errno);
+ return 0;
+ }
+ return info->prog_id;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 923b51762759..d4f8e749611b 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,8 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
#include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);