From 44a726c3f23cf762ef4ce3c1709aefbcbe97f62c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Sat, 1 Oct 2022 13:44:24 +0300
Subject: bpftool: Print newline before '}' for struct with padding only fields

btf_dump_emit_struct_def attempts to print empty structures at a
single line, e.g. `struct empty {}`. However, it has to account for a
case when there are no regular but some padding fields in the struct.
In such case `vlen` would be zero, but size would be non-zero.

E.g. here is struct bpf_timer from vmlinux.h before this patch:

 struct bpf_timer {
 	long: 64;
	long: 64;};

And after this patch:

 struct bpf_dynptr {
 	long: 64;
	long: 64;
 };

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221001104425.415768-1-eddyz87@gmail.com
---
 tools/lib/bpf/btf_dump.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 4221f73a74d0..e4da6de68d8f 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -944,7 +944,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 					  lvl + 1);
 	}
 
-	if (vlen)
+	/*
+	 * Keep `struct empty {}` on a single line,
+	 * only print newline when there are regular or padding fields.
+	 */
+	if (vlen || t->size)
 		btf_dump_printf(d, "\n");
 	btf_dump_printf(d, "%s}", pfx(lvl));
 	if (packed)
-- 
cgit v1.2.3-58-ga151


From d503f1176b14f722a40ea5110312614982f9a80b Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Sat, 1 Oct 2022 13:44:25 +0300
Subject: selftests/bpf: Test btf dump for struct with padding only fields

Structures with zero regular fields but some padding constitute a
special case in btf_dump.c:btf_dump_emit_struct_def with regards to
newline before closing '}'.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221001104425.415768-2-eddyz87@gmail.com
---
 tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index f2661c8d2d90..7cb522d22a66 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -102,12 +102,21 @@ struct zone {
 	struct zone_padding __pad__;
 };
 
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct padding_wo_named_members {
+	long: 64;
+	long: 64;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
 int f(struct {
 	struct padded_implicitly _1;
 	struct padded_explicitly _2;
 	struct padded_a_lot _3;
 	struct padded_cache_line _4;
 	struct zone _5;
+	struct padding_wo_named_members _6;
 } *_)
 {
 	return 0;
-- 
cgit v1.2.3-58-ga151


From 2a72f5951ac6d613216a93ae3e172cabb04aaefc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 5 Oct 2022 09:14:48 -0700
Subject: selftests/bpf: allow requesting log level 2 in test_verifier

Log level 1 on successfully verified programs are basically equivalent
to log level 4 (stats-only), so it's useful to be able to request more
verbose logs at log level 2. Teach test_verifier to recognize -vv as
"very verbose" mode switch and use log level 2 in such mode.

Also force verifier stats regradless of -v or -vv, they are very minimal
and useful to be always emitted in verbose mode.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221005161450.1064469-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2dbcbf363c18..9c7091100b7f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -68,7 +68,6 @@
 #define SKIP_INSNS()	BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
 
 #define DEFAULT_LIBBPF_LOG_LEVEL	4
-#define VERBOSE_LIBBPF_LOG_LEVEL	1
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
@@ -81,6 +80,7 @@
 static bool unpriv_disabled = false;
 static int skips;
 static bool verbose = false;
+static int verif_log_level = 0;
 
 struct kfunc_btf_id_pair {
 	const char *kfunc;
@@ -759,7 +759,7 @@ static int load_btf_spec(__u32 *types, int types_len,
 		    .log_buf = bpf_vlog,
 		    .log_size = sizeof(bpf_vlog),
 		    .log_level = (verbose
-				  ? VERBOSE_LIBBPF_LOG_LEVEL
+				  ? verif_log_level
 				  : DEFAULT_LIBBPF_LOG_LEVEL),
 	);
 
@@ -1491,7 +1491,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	opts.expected_attach_type = test->expected_attach_type;
 	if (verbose)
-		opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL;
+		opts.log_level = verif_log_level | 4; /* force stats */
 	else if (expected_ret == VERBOSE_ACCEPT)
 		opts.log_level = 2;
 	else
@@ -1746,6 +1746,13 @@ int main(int argc, char **argv)
 	if (argc > 1 && strcmp(argv[1], "-v") == 0) {
 		arg++;
 		verbose = true;
+		verif_log_level = 1;
+		argc--;
+	}
+	if (argc > 1 && strcmp(argv[1], "-vv") == 0) {
+		arg++;
+		verbose = true;
+		verif_log_level = 2;
 		argc--;
 	}
 
-- 
cgit v1.2.3-58-ga151


From 6df2eb45e378f38ca42776276e7bb5b5078c12cf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 5 Oct 2022 09:14:49 -0700
Subject: selftests/bpf: avoid reporting +100% difference in veristat for
 actual 0%

In special case when both base and comparison values are 0, veristat
currently reports "+0 (+100%)" difference, which is quite confusing. Fix
it up to be "+0 (+0%)".

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221005161450.1064469-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index b0d83a28e348..38f678122a7d 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1104,17 +1104,21 @@ static void output_comp_stats(const struct verif_stats *base, const struct verif
 			else
 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
 		} else {
+			double p = 0.0;
+
 			snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
 			snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
 
 			diff_val = comp_val - base_val;
 			if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) {
-				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
-					 diff_val, comp_val < base_val ? -100.0 : 100.0);
+				if (comp_val == base_val)
+					p = 0.0; /* avoid +0 (+100%) case */
+				else
+					p = comp_val < base_val ? -100.0 : 100.0;
 			} else {
-				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
-					 diff_val, diff_val * 100.0 / base_val);
+				 p = diff_val * 100.0 / base_val;
 			}
+			snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
 		}
 
 		switch (fmt) {
-- 
cgit v1.2.3-58-ga151


From 60df8c4d32d47fa5e91d02811dadcb97a0bd097e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 5 Oct 2022 09:14:50 -0700
Subject: selftests/bpf: add BPF object fixup step to veristat

Add a step to attempt to "fix up" BPF object file to make it possible to
successfully load it. E.g., set non-zero size for BPF maps that expect
max_entries set, but BPF object file itself doesn't have declarative
max_entries values specified.

Another issue was with automatic map pinning. Pinning has no effect on
BPF verification process itself but can interfere when validating
multiple related programs and object files, so veristat disabled all the
pinning explicitly.

In the future more such fix up heuristics could be added to accommodate
common patterns encountered in practice.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221005161450.1064469-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 38f678122a7d..973cbf6af323 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -509,6 +509,28 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 	return 0;
 }
 
+static void fixup_obj(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+
+	bpf_object__for_each_map(map, obj) {
+		/* disable pinning */
+		bpf_map__set_pin_path(map, NULL);
+
+		/* fix up map size, if necessary */
+		switch (bpf_map__type(map)) {
+		case BPF_MAP_TYPE_SK_STORAGE:
+		case BPF_MAP_TYPE_TASK_STORAGE:
+		case BPF_MAP_TYPE_INODE_STORAGE:
+		case BPF_MAP_TYPE_CGROUP_STORAGE:
+			break;
+		default:
+			if (bpf_map__max_entries(map) == 0)
+				bpf_map__set_max_entries(map, 1);
+		}
+	}
+}
+
 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
 {
 	const char *prog_name = bpf_program__name(prog);
@@ -543,6 +565,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	}
 	verif_log_buf[0] = '\0';
 
+	/* increase chances of successful BPF object loading */
+	fixup_obj(obj);
+
 	err = bpf_object__load(obj);
 	env.progs_processed++;
 
-- 
cgit v1.2.3-58-ga151


From 8a76145a2ec2a81dfe34d7ac42e8c242f095e8c8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 5 Oct 2022 21:24:51 -0700
Subject: bpf: explicitly define BPF_FUNC_xxx integer values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Historically enum bpf_func_id's BPF_FUNC_xxx enumerators relied on
implicit sequential values being assigned by compiler. This is
convenient, as new BPF helpers are always added at the very end, but it
also has its downsides, some of them being:

  - with over 200 helpers now it's very hard to know what's each helper's ID,
    which is often important to know when working with BPF assembly (e.g.,
    by dumping raw bpf assembly instructions with llvm-objdump -d
    command). it's possible to work around this by looking into vmlinux.h,
    dumping /sys/btf/kernel/vmlinux, looking at libbpf-provided
    bpf_helper_defs.h, etc. But it always feels like an unnecessary step
    and one should be able to quickly figure this out from UAPI header.

  - when backporting and cherry-picking only some BPF helpers onto older
    kernels it's important to be able to skip some enum values for helpers
    that weren't backported, but preserve absolute integer IDs to keep BPF
    helper IDs stable so that BPF programs stay portable across upstream
    and backported kernels.

While neither problem is insurmountable, they come up frequently enough
and are annoying enough to warrant improving the situation. And for the
backporting the problem can easily go unnoticed for a while, especially
if backport is done with people not very familiar with BPF subsystem overall.

Anyways, it's easy to fix this by making sure that __BPF_FUNC_MAPPER
macro provides explicit helper IDs. Unfortunately that would potentially
break existing users that use UAPI-exposed __BPF_FUNC_MAPPER and are
expected to pass macro that accepts only symbolic helper identifier
(e.g., map_lookup_elem for bpf_map_lookup_elem() helper).

As such, we need to introduce a new macro (___BPF_FUNC_MAPPER) which
would specify both identifier and integer ID, but in such a way as to
allow existing __BPF_FUNC_MAPPER be expressed in terms of new
___BPF_FUNC_MAPPER macro. And that's what this patch is doing. To avoid
duplication and allow __BPF_FUNC_MAPPER stay *exactly* the same,
___BPF_FUNC_MAPPER accepts arbitrary "context" arguments, which can be
used to pass any extra macros, arguments, and whatnot. In our case we
use this to pass original user-provided macro that expects single
argument and __BPF_FUNC_MAPPER is using it's own three-argument
__BPF_FUNC_MAPPER_APPLY intermediate macro to impedance-match new and
old "callback" macros.

Once we resolve this, we use new ___BPF_FUNC_MAPPER to define enum
bpf_func_id with explicit values. The other users of __BPF_FUNC_MAPPER
in kernel (namely in kernel/bpf/disasm.c) are kept exactly the same both
as demonstration that backwards compat works, but also to avoid
unnecessary code churn.

Note that new ___BPF_FUNC_MAPPER() doesn't forcefully insert comma
between values, as that might not be appropriate in all possible cases
where ___BPF_FUNC_MAPPER might be used by users. This doesn't reduce
usability, as it's trivial to insert that comma inside "callback" macro.

To validate all the manually specified IDs are exactly right, we used
BTF to compare before and after values:

  $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > after.txt
  $ git stash # stach UAPI changes
  $ make -j90
  ... re-building kernel without UAPI changes ...
  $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > before.txt
  $ diff -u before.txt after.txt
  --- before.txt  2022-10-05 10:48:18.119195916 -0700
  +++ after.txt   2022-10-05 10:46:49.446615025 -0700
  @@ -1,4 +1,4 @@
  -[14576] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
  +[9560] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
          'BPF_FUNC_unspec' val=0
          'BPF_FUNC_map_lookup_elem' val=1
          'BPF_FUNC_map_update_elem' val=2

As can be seen from diff above, the only thing that changed was resulting BTF
type ID of ENUM bpf_func_id, not any of the enumerators, their names or integer
values.

The only other place that needed fixing was scripts/bpf_doc.py used to generate
man pages and bpf_helper_defs.h header for libbpf and selftests. That script is
tightly-coupled to exact shape of ___BPF_FUNC_MAPPER macro definition, so had
to be trivially adapted.

Cc: Quentin Monnet <quentin@isovalent.com>
Reported-by: Andrea Terzolo <andrea.terzolo@polito.it>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221006042452.2089843-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 432 +++++++++++++++++++++--------------------
 scripts/bpf_doc.py             |  19 +-
 tools/include/uapi/linux/bpf.h | 432 +++++++++++++++++++++--------------------
 3 files changed, 447 insertions(+), 436 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51b9aa640ad2..17f61338f8f8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5436,225 +5436,231 @@ union bpf_attr {
  *		larger than the size of the ring buffer, or which cannot fit
  *		within a struct bpf_dynptr.
  */
-#define __BPF_FUNC_MAPPER(FN)		\
-	FN(unspec),			\
-	FN(map_lookup_elem),		\
-	FN(map_update_elem),		\
-	FN(map_delete_elem),		\
-	FN(probe_read),			\
-	FN(ktime_get_ns),		\
-	FN(trace_printk),		\
-	FN(get_prandom_u32),		\
-	FN(get_smp_processor_id),	\
-	FN(skb_store_bytes),		\
-	FN(l3_csum_replace),		\
-	FN(l4_csum_replace),		\
-	FN(tail_call),			\
-	FN(clone_redirect),		\
-	FN(get_current_pid_tgid),	\
-	FN(get_current_uid_gid),	\
-	FN(get_current_comm),		\
-	FN(get_cgroup_classid),		\
-	FN(skb_vlan_push),		\
-	FN(skb_vlan_pop),		\
-	FN(skb_get_tunnel_key),		\
-	FN(skb_set_tunnel_key),		\
-	FN(perf_event_read),		\
-	FN(redirect),			\
-	FN(get_route_realm),		\
-	FN(perf_event_output),		\
-	FN(skb_load_bytes),		\
-	FN(get_stackid),		\
-	FN(csum_diff),			\
-	FN(skb_get_tunnel_opt),		\
-	FN(skb_set_tunnel_opt),		\
-	FN(skb_change_proto),		\
-	FN(skb_change_type),		\
-	FN(skb_under_cgroup),		\
-	FN(get_hash_recalc),		\
-	FN(get_current_task),		\
-	FN(probe_write_user),		\
-	FN(current_task_under_cgroup),	\
-	FN(skb_change_tail),		\
-	FN(skb_pull_data),		\
-	FN(csum_update),		\
-	FN(set_hash_invalid),		\
-	FN(get_numa_node_id),		\
-	FN(skb_change_head),		\
-	FN(xdp_adjust_head),		\
-	FN(probe_read_str),		\
-	FN(get_socket_cookie),		\
-	FN(get_socket_uid),		\
-	FN(set_hash),			\
-	FN(setsockopt),			\
-	FN(skb_adjust_room),		\
-	FN(redirect_map),		\
-	FN(sk_redirect_map),		\
-	FN(sock_map_update),		\
-	FN(xdp_adjust_meta),		\
-	FN(perf_event_read_value),	\
-	FN(perf_prog_read_value),	\
-	FN(getsockopt),			\
-	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),	\
-	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),		\
-	FN(bind),			\
-	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),		\
-	FN(get_stack),			\
-	FN(skb_load_bytes_relative),	\
-	FN(fib_lookup),			\
-	FN(sock_hash_update),		\
-	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),		\
-	FN(lwt_push_encap),		\
-	FN(lwt_seg6_store_bytes),	\
-	FN(lwt_seg6_adjust_srh),	\
-	FN(lwt_seg6_action),		\
-	FN(rc_repeat),			\
-	FN(rc_keydown),			\
-	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),		\
-	FN(sk_select_reuseport),	\
-	FN(skb_ancestor_cgroup_id),	\
-	FN(sk_lookup_tcp),		\
-	FN(sk_lookup_udp),		\
-	FN(sk_release),			\
-	FN(map_push_elem),		\
-	FN(map_pop_elem),		\
-	FN(map_peek_elem),		\
-	FN(msg_push_data),		\
-	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),		\
-	FN(spin_lock),			\
-	FN(spin_unlock),		\
-	FN(sk_fullsock),		\
-	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),		\
-	FN(get_listener_sock),		\
-	FN(skc_lookup_tcp),		\
-	FN(tcp_check_syncookie),	\
-	FN(sysctl_get_name),		\
-	FN(sysctl_get_current_value),	\
-	FN(sysctl_get_new_value),	\
-	FN(sysctl_set_new_value),	\
-	FN(strtol),			\
-	FN(strtoul),			\
-	FN(sk_storage_get),		\
-	FN(sk_storage_delete),		\
-	FN(send_signal),		\
-	FN(tcp_gen_syncookie),		\
-	FN(skb_output),			\
-	FN(probe_read_user),		\
-	FN(probe_read_kernel),		\
-	FN(probe_read_user_str),	\
-	FN(probe_read_kernel_str),	\
-	FN(tcp_send_ack),		\
-	FN(send_signal_thread),		\
-	FN(jiffies64),			\
-	FN(read_branch_records),	\
-	FN(get_ns_current_pid_tgid),	\
-	FN(xdp_output),			\
-	FN(get_netns_cookie),		\
-	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),			\
-	FN(ktime_get_boot_ns),		\
-	FN(seq_printf),			\
-	FN(seq_write),			\
-	FN(sk_cgroup_id),		\
-	FN(sk_ancestor_cgroup_id),	\
-	FN(ringbuf_output),		\
-	FN(ringbuf_reserve),		\
-	FN(ringbuf_submit),		\
-	FN(ringbuf_discard),		\
-	FN(ringbuf_query),		\
-	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),		\
-	FN(skc_to_tcp_sock),		\
-	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),	\
-	FN(skc_to_udp6_sock),		\
-	FN(get_task_stack),		\
-	FN(load_hdr_opt),		\
-	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),		\
-	FN(inode_storage_get),		\
-	FN(inode_storage_delete),	\
-	FN(d_path),			\
-	FN(copy_from_user),		\
-	FN(snprintf_btf),		\
-	FN(seq_printf_btf),		\
-	FN(skb_cgroup_classid),		\
-	FN(redirect_neigh),		\
-	FN(per_cpu_ptr),		\
-	FN(this_cpu_ptr),		\
-	FN(redirect_peer),		\
-	FN(task_storage_get),		\
-	FN(task_storage_delete),	\
-	FN(get_current_task_btf),	\
-	FN(bprm_opts_set),		\
-	FN(ktime_get_coarse_ns),	\
-	FN(ima_inode_hash),		\
-	FN(sock_from_file),		\
-	FN(check_mtu),			\
-	FN(for_each_map_elem),		\
-	FN(snprintf),			\
-	FN(sys_bpf),			\
-	FN(btf_find_by_name_kind),	\
-	FN(sys_close),			\
-	FN(timer_init),			\
-	FN(timer_set_callback),		\
-	FN(timer_start),		\
-	FN(timer_cancel),		\
-	FN(get_func_ip),		\
-	FN(get_attach_cookie),		\
-	FN(task_pt_regs),		\
-	FN(get_branch_snapshot),	\
-	FN(trace_vprintk),		\
-	FN(skc_to_unix_sock),		\
-	FN(kallsyms_lookup_name),	\
-	FN(find_vma),			\
-	FN(loop),			\
-	FN(strncmp),			\
-	FN(get_func_arg),		\
-	FN(get_func_ret),		\
-	FN(get_func_arg_cnt),		\
-	FN(get_retval),			\
-	FN(set_retval),			\
-	FN(xdp_get_buff_len),		\
-	FN(xdp_load_bytes),		\
-	FN(xdp_store_bytes),		\
-	FN(copy_from_user_task),	\
-	FN(skb_set_tstamp),		\
-	FN(ima_file_hash),		\
-	FN(kptr_xchg),			\
-	FN(map_lookup_percpu_elem),     \
-	FN(skc_to_mptcp_sock),		\
-	FN(dynptr_from_mem),		\
-	FN(ringbuf_reserve_dynptr),	\
-	FN(ringbuf_submit_dynptr),	\
-	FN(ringbuf_discard_dynptr),	\
-	FN(dynptr_read),		\
-	FN(dynptr_write),		\
-	FN(dynptr_data),		\
-	FN(tcp_raw_gen_syncookie_ipv4),	\
-	FN(tcp_raw_gen_syncookie_ipv6),	\
-	FN(tcp_raw_check_syncookie_ipv4),	\
-	FN(tcp_raw_check_syncookie_ipv6),	\
-	FN(ktime_get_tai_ns),		\
-	FN(user_ringbuf_drain),		\
+#define ___BPF_FUNC_MAPPER(FN, ctx...)			\
+	FN(unspec, 0, ##ctx)				\
+	FN(map_lookup_elem, 1, ##ctx)			\
+	FN(map_update_elem, 2, ##ctx)			\
+	FN(map_delete_elem, 3, ##ctx)			\
+	FN(probe_read, 4, ##ctx)			\
+	FN(ktime_get_ns, 5, ##ctx)			\
+	FN(trace_printk, 6, ##ctx)			\
+	FN(get_prandom_u32, 7, ##ctx)			\
+	FN(get_smp_processor_id, 8, ##ctx)		\
+	FN(skb_store_bytes, 9, ##ctx)			\
+	FN(l3_csum_replace, 10, ##ctx)			\
+	FN(l4_csum_replace, 11, ##ctx)			\
+	FN(tail_call, 12, ##ctx)			\
+	FN(clone_redirect, 13, ##ctx)			\
+	FN(get_current_pid_tgid, 14, ##ctx)		\
+	FN(get_current_uid_gid, 15, ##ctx)		\
+	FN(get_current_comm, 16, ##ctx)			\
+	FN(get_cgroup_classid, 17, ##ctx)		\
+	FN(skb_vlan_push, 18, ##ctx)			\
+	FN(skb_vlan_pop, 19, ##ctx)			\
+	FN(skb_get_tunnel_key, 20, ##ctx)		\
+	FN(skb_set_tunnel_key, 21, ##ctx)		\
+	FN(perf_event_read, 22, ##ctx)			\
+	FN(redirect, 23, ##ctx)				\
+	FN(get_route_realm, 24, ##ctx)			\
+	FN(perf_event_output, 25, ##ctx)		\
+	FN(skb_load_bytes, 26, ##ctx)			\
+	FN(get_stackid, 27, ##ctx)			\
+	FN(csum_diff, 28, ##ctx)			\
+	FN(skb_get_tunnel_opt, 29, ##ctx)		\
+	FN(skb_set_tunnel_opt, 30, ##ctx)		\
+	FN(skb_change_proto, 31, ##ctx)			\
+	FN(skb_change_type, 32, ##ctx)			\
+	FN(skb_under_cgroup, 33, ##ctx)			\
+	FN(get_hash_recalc, 34, ##ctx)			\
+	FN(get_current_task, 35, ##ctx)			\
+	FN(probe_write_user, 36, ##ctx)			\
+	FN(current_task_under_cgroup, 37, ##ctx)	\
+	FN(skb_change_tail, 38, ##ctx)			\
+	FN(skb_pull_data, 39, ##ctx)			\
+	FN(csum_update, 40, ##ctx)			\
+	FN(set_hash_invalid, 41, ##ctx)			\
+	FN(get_numa_node_id, 42, ##ctx)			\
+	FN(skb_change_head, 43, ##ctx)			\
+	FN(xdp_adjust_head, 44, ##ctx)			\
+	FN(probe_read_str, 45, ##ctx)			\
+	FN(get_socket_cookie, 46, ##ctx)		\
+	FN(get_socket_uid, 47, ##ctx)			\
+	FN(set_hash, 48, ##ctx)				\
+	FN(setsockopt, 49, ##ctx)			\
+	FN(skb_adjust_room, 50, ##ctx)			\
+	FN(redirect_map, 51, ##ctx)			\
+	FN(sk_redirect_map, 52, ##ctx)			\
+	FN(sock_map_update, 53, ##ctx)			\
+	FN(xdp_adjust_meta, 54, ##ctx)			\
+	FN(perf_event_read_value, 55, ##ctx)		\
+	FN(perf_prog_read_value, 56, ##ctx)		\
+	FN(getsockopt, 57, ##ctx)			\
+	FN(override_return, 58, ##ctx)			\
+	FN(sock_ops_cb_flags_set, 59, ##ctx)		\
+	FN(msg_redirect_map, 60, ##ctx)			\
+	FN(msg_apply_bytes, 61, ##ctx)			\
+	FN(msg_cork_bytes, 62, ##ctx)			\
+	FN(msg_pull_data, 63, ##ctx)			\
+	FN(bind, 64, ##ctx)				\
+	FN(xdp_adjust_tail, 65, ##ctx)			\
+	FN(skb_get_xfrm_state, 66, ##ctx)		\
+	FN(get_stack, 67, ##ctx)			\
+	FN(skb_load_bytes_relative, 68, ##ctx)		\
+	FN(fib_lookup, 69, ##ctx)			\
+	FN(sock_hash_update, 70, ##ctx)			\
+	FN(msg_redirect_hash, 71, ##ctx)		\
+	FN(sk_redirect_hash, 72, ##ctx)			\
+	FN(lwt_push_encap, 73, ##ctx)			\
+	FN(lwt_seg6_store_bytes, 74, ##ctx)		\
+	FN(lwt_seg6_adjust_srh, 75, ##ctx)		\
+	FN(lwt_seg6_action, 76, ##ctx)			\
+	FN(rc_repeat, 77, ##ctx)			\
+	FN(rc_keydown, 78, ##ctx)			\
+	FN(skb_cgroup_id, 79, ##ctx)			\
+	FN(get_current_cgroup_id, 80, ##ctx)		\
+	FN(get_local_storage, 81, ##ctx)		\
+	FN(sk_select_reuseport, 82, ##ctx)		\
+	FN(skb_ancestor_cgroup_id, 83, ##ctx)		\
+	FN(sk_lookup_tcp, 84, ##ctx)			\
+	FN(sk_lookup_udp, 85, ##ctx)			\
+	FN(sk_release, 86, ##ctx)			\
+	FN(map_push_elem, 87, ##ctx)			\
+	FN(map_pop_elem, 88, ##ctx)			\
+	FN(map_peek_elem, 89, ##ctx)			\
+	FN(msg_push_data, 90, ##ctx)			\
+	FN(msg_pop_data, 91, ##ctx)			\
+	FN(rc_pointer_rel, 92, ##ctx)			\
+	FN(spin_lock, 93, ##ctx)			\
+	FN(spin_unlock, 94, ##ctx)			\
+	FN(sk_fullsock, 95, ##ctx)			\
+	FN(tcp_sock, 96, ##ctx)				\
+	FN(skb_ecn_set_ce, 97, ##ctx)			\
+	FN(get_listener_sock, 98, ##ctx)		\
+	FN(skc_lookup_tcp, 99, ##ctx)			\
+	FN(tcp_check_syncookie, 100, ##ctx)		\
+	FN(sysctl_get_name, 101, ##ctx)			\
+	FN(sysctl_get_current_value, 102, ##ctx)	\
+	FN(sysctl_get_new_value, 103, ##ctx)		\
+	FN(sysctl_set_new_value, 104, ##ctx)		\
+	FN(strtol, 105, ##ctx)				\
+	FN(strtoul, 106, ##ctx)				\
+	FN(sk_storage_get, 107, ##ctx)			\
+	FN(sk_storage_delete, 108, ##ctx)		\
+	FN(send_signal, 109, ##ctx)			\
+	FN(tcp_gen_syncookie, 110, ##ctx)		\
+	FN(skb_output, 111, ##ctx)			\
+	FN(probe_read_user, 112, ##ctx)			\
+	FN(probe_read_kernel, 113, ##ctx)		\
+	FN(probe_read_user_str, 114, ##ctx)		\
+	FN(probe_read_kernel_str, 115, ##ctx)		\
+	FN(tcp_send_ack, 116, ##ctx)			\
+	FN(send_signal_thread, 117, ##ctx)		\
+	FN(jiffies64, 118, ##ctx)			\
+	FN(read_branch_records, 119, ##ctx)		\
+	FN(get_ns_current_pid_tgid, 120, ##ctx)		\
+	FN(xdp_output, 121, ##ctx)			\
+	FN(get_netns_cookie, 122, ##ctx)		\
+	FN(get_current_ancestor_cgroup_id, 123, ##ctx)	\
+	FN(sk_assign, 124, ##ctx)			\
+	FN(ktime_get_boot_ns, 125, ##ctx)		\
+	FN(seq_printf, 126, ##ctx)			\
+	FN(seq_write, 127, ##ctx)			\
+	FN(sk_cgroup_id, 128, ##ctx)			\
+	FN(sk_ancestor_cgroup_id, 129, ##ctx)		\
+	FN(ringbuf_output, 130, ##ctx)			\
+	FN(ringbuf_reserve, 131, ##ctx)			\
+	FN(ringbuf_submit, 132, ##ctx)			\
+	FN(ringbuf_discard, 133, ##ctx)			\
+	FN(ringbuf_query, 134, ##ctx)			\
+	FN(csum_level, 135, ##ctx)			\
+	FN(skc_to_tcp6_sock, 136, ##ctx)		\
+	FN(skc_to_tcp_sock, 137, ##ctx)			\
+	FN(skc_to_tcp_timewait_sock, 138, ##ctx)	\
+	FN(skc_to_tcp_request_sock, 139, ##ctx)		\
+	FN(skc_to_udp6_sock, 140, ##ctx)		\
+	FN(get_task_stack, 141, ##ctx)			\
+	FN(load_hdr_opt, 142, ##ctx)			\
+	FN(store_hdr_opt, 143, ##ctx)			\
+	FN(reserve_hdr_opt, 144, ##ctx)			\
+	FN(inode_storage_get, 145, ##ctx)		\
+	FN(inode_storage_delete, 146, ##ctx)		\
+	FN(d_path, 147, ##ctx)				\
+	FN(copy_from_user, 148, ##ctx)			\
+	FN(snprintf_btf, 149, ##ctx)			\
+	FN(seq_printf_btf, 150, ##ctx)			\
+	FN(skb_cgroup_classid, 151, ##ctx)		\
+	FN(redirect_neigh, 152, ##ctx)			\
+	FN(per_cpu_ptr, 153, ##ctx)			\
+	FN(this_cpu_ptr, 154, ##ctx)			\
+	FN(redirect_peer, 155, ##ctx)			\
+	FN(task_storage_get, 156, ##ctx)		\
+	FN(task_storage_delete, 157, ##ctx)		\
+	FN(get_current_task_btf, 158, ##ctx)		\
+	FN(bprm_opts_set, 159, ##ctx)			\
+	FN(ktime_get_coarse_ns, 160, ##ctx)		\
+	FN(ima_inode_hash, 161, ##ctx)			\
+	FN(sock_from_file, 162, ##ctx)			\
+	FN(check_mtu, 163, ##ctx)			\
+	FN(for_each_map_elem, 164, ##ctx)		\
+	FN(snprintf, 165, ##ctx)			\
+	FN(sys_bpf, 166, ##ctx)				\
+	FN(btf_find_by_name_kind, 167, ##ctx)		\
+	FN(sys_close, 168, ##ctx)			\
+	FN(timer_init, 169, ##ctx)			\
+	FN(timer_set_callback, 170, ##ctx)		\
+	FN(timer_start, 171, ##ctx)			\
+	FN(timer_cancel, 172, ##ctx)			\
+	FN(get_func_ip, 173, ##ctx)			\
+	FN(get_attach_cookie, 174, ##ctx)		\
+	FN(task_pt_regs, 175, ##ctx)			\
+	FN(get_branch_snapshot, 176, ##ctx)		\
+	FN(trace_vprintk, 177, ##ctx)			\
+	FN(skc_to_unix_sock, 178, ##ctx)		\
+	FN(kallsyms_lookup_name, 179, ##ctx)		\
+	FN(find_vma, 180, ##ctx)			\
+	FN(loop, 181, ##ctx)				\
+	FN(strncmp, 182, ##ctx)				\
+	FN(get_func_arg, 183, ##ctx)			\
+	FN(get_func_ret, 184, ##ctx)			\
+	FN(get_func_arg_cnt, 185, ##ctx)		\
+	FN(get_retval, 186, ##ctx)			\
+	FN(set_retval, 187, ##ctx)			\
+	FN(xdp_get_buff_len, 188, ##ctx)		\
+	FN(xdp_load_bytes, 189, ##ctx)			\
+	FN(xdp_store_bytes, 190, ##ctx)			\
+	FN(copy_from_user_task, 191, ##ctx)		\
+	FN(skb_set_tstamp, 192, ##ctx)			\
+	FN(ima_file_hash, 193, ##ctx)			\
+	FN(kptr_xchg, 194, ##ctx)			\
+	FN(map_lookup_percpu_elem, 195, ##ctx)		\
+	FN(skc_to_mptcp_sock, 196, ##ctx)		\
+	FN(dynptr_from_mem, 197, ##ctx)			\
+	FN(ringbuf_reserve_dynptr, 198, ##ctx)		\
+	FN(ringbuf_submit_dynptr, 199, ##ctx)		\
+	FN(ringbuf_discard_dynptr, 200, ##ctx)		\
+	FN(dynptr_read, 201, ##ctx)			\
+	FN(dynptr_write, 202, ##ctx)			\
+	FN(dynptr_data, 203, ##ctx)			\
+	FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx)	\
+	FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx)	\
+	FN(ktime_get_tai_ns, 208, ##ctx)		\
+	FN(user_ringbuf_drain, 209, ##ctx)		\
 	/* */
 
+/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
+ * know or care about integer value that is now passed as second argument
+ */
+#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name),
+#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN)
+
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
  */
-#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y,
 enum bpf_func_id {
-	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+	___BPF_FUNC_MAPPER(__BPF_ENUM_FN)
 	__BPF_FUNC_MAX_ID,
 };
 #undef __BPF_ENUM_FN
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index d5c389df6045..2fe07c9e3fe0 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -253,28 +253,27 @@ class HeaderParser(object):
                 break
 
     def parse_define_helpers(self):
-        # Parse FN(...) in #define __BPF_FUNC_MAPPER to compare later with the
+        # Parse FN(...) in #define ___BPF_FUNC_MAPPER to compare later with the
         # number of unique function names present in description and use the
         # correct enumeration value.
         # Note: seek_to(..) discards the first line below the target search text,
-        # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers.
-        self.seek_to('#define __BPF_FUNC_MAPPER(FN)',
+        # resulting in FN(unspec, 0, ##ctx) being skipped and not added to
+        # self.define_unique_helpers.
+        self.seek_to('#define ___BPF_FUNC_MAPPER(FN, ctx...)',
                      'Could not find start of eBPF helper definition list')
         # Searches for one FN(\w+) define or a backslash for newline
-        p = re.compile('\s*FN\((\w+)\)|\\\\')
+        p = re.compile('\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
         fn_defines_str = ''
-        i = 1  # 'unspec' is skipped as mentioned above
         while True:
             capture = p.match(self.line)
             if capture:
                 fn_defines_str += self.line
-                self.helper_enum_vals[capture.expand(r'bpf_\1')] = i
-                i += 1
+                self.helper_enum_vals[capture.expand(r'bpf_\1')] = int(capture[2])
             else:
                 break
             self.line = self.reader.readline()
         # Find the number of occurences of FN(\w+)
-        self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str)
+        self.define_unique_helpers = re.findall('FN\(\w+, \d+, ##ctx\)', fn_defines_str)
 
     def assign_helper_values(self):
         seen_helpers = set()
@@ -423,7 +422,7 @@ class PrinterHelpersRST(PrinterRST):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
-        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '___BPF_FUNC_MAPPER')
 
     def print_header(self):
         header = '''\
@@ -636,7 +635,7 @@ class PrinterHelpers(Printer):
     """
     def __init__(self, parser):
         self.elements = parser.helpers
-        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER')
+        self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '___BPF_FUNC_MAPPER')
 
     type_fwds = [
             'struct bpf_fib_lookup',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 51b9aa640ad2..17f61338f8f8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5436,225 +5436,231 @@ union bpf_attr {
  *		larger than the size of the ring buffer, or which cannot fit
  *		within a struct bpf_dynptr.
  */
-#define __BPF_FUNC_MAPPER(FN)		\
-	FN(unspec),			\
-	FN(map_lookup_elem),		\
-	FN(map_update_elem),		\
-	FN(map_delete_elem),		\
-	FN(probe_read),			\
-	FN(ktime_get_ns),		\
-	FN(trace_printk),		\
-	FN(get_prandom_u32),		\
-	FN(get_smp_processor_id),	\
-	FN(skb_store_bytes),		\
-	FN(l3_csum_replace),		\
-	FN(l4_csum_replace),		\
-	FN(tail_call),			\
-	FN(clone_redirect),		\
-	FN(get_current_pid_tgid),	\
-	FN(get_current_uid_gid),	\
-	FN(get_current_comm),		\
-	FN(get_cgroup_classid),		\
-	FN(skb_vlan_push),		\
-	FN(skb_vlan_pop),		\
-	FN(skb_get_tunnel_key),		\
-	FN(skb_set_tunnel_key),		\
-	FN(perf_event_read),		\
-	FN(redirect),			\
-	FN(get_route_realm),		\
-	FN(perf_event_output),		\
-	FN(skb_load_bytes),		\
-	FN(get_stackid),		\
-	FN(csum_diff),			\
-	FN(skb_get_tunnel_opt),		\
-	FN(skb_set_tunnel_opt),		\
-	FN(skb_change_proto),		\
-	FN(skb_change_type),		\
-	FN(skb_under_cgroup),		\
-	FN(get_hash_recalc),		\
-	FN(get_current_task),		\
-	FN(probe_write_user),		\
-	FN(current_task_under_cgroup),	\
-	FN(skb_change_tail),		\
-	FN(skb_pull_data),		\
-	FN(csum_update),		\
-	FN(set_hash_invalid),		\
-	FN(get_numa_node_id),		\
-	FN(skb_change_head),		\
-	FN(xdp_adjust_head),		\
-	FN(probe_read_str),		\
-	FN(get_socket_cookie),		\
-	FN(get_socket_uid),		\
-	FN(set_hash),			\
-	FN(setsockopt),			\
-	FN(skb_adjust_room),		\
-	FN(redirect_map),		\
-	FN(sk_redirect_map),		\
-	FN(sock_map_update),		\
-	FN(xdp_adjust_meta),		\
-	FN(perf_event_read_value),	\
-	FN(perf_prog_read_value),	\
-	FN(getsockopt),			\
-	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),	\
-	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),		\
-	FN(bind),			\
-	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),		\
-	FN(get_stack),			\
-	FN(skb_load_bytes_relative),	\
-	FN(fib_lookup),			\
-	FN(sock_hash_update),		\
-	FN(msg_redirect_hash),		\
-	FN(sk_redirect_hash),		\
-	FN(lwt_push_encap),		\
-	FN(lwt_seg6_store_bytes),	\
-	FN(lwt_seg6_adjust_srh),	\
-	FN(lwt_seg6_action),		\
-	FN(rc_repeat),			\
-	FN(rc_keydown),			\
-	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),		\
-	FN(sk_select_reuseport),	\
-	FN(skb_ancestor_cgroup_id),	\
-	FN(sk_lookup_tcp),		\
-	FN(sk_lookup_udp),		\
-	FN(sk_release),			\
-	FN(map_push_elem),		\
-	FN(map_pop_elem),		\
-	FN(map_peek_elem),		\
-	FN(msg_push_data),		\
-	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),		\
-	FN(spin_lock),			\
-	FN(spin_unlock),		\
-	FN(sk_fullsock),		\
-	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),		\
-	FN(get_listener_sock),		\
-	FN(skc_lookup_tcp),		\
-	FN(tcp_check_syncookie),	\
-	FN(sysctl_get_name),		\
-	FN(sysctl_get_current_value),	\
-	FN(sysctl_get_new_value),	\
-	FN(sysctl_set_new_value),	\
-	FN(strtol),			\
-	FN(strtoul),			\
-	FN(sk_storage_get),		\
-	FN(sk_storage_delete),		\
-	FN(send_signal),		\
-	FN(tcp_gen_syncookie),		\
-	FN(skb_output),			\
-	FN(probe_read_user),		\
-	FN(probe_read_kernel),		\
-	FN(probe_read_user_str),	\
-	FN(probe_read_kernel_str),	\
-	FN(tcp_send_ack),		\
-	FN(send_signal_thread),		\
-	FN(jiffies64),			\
-	FN(read_branch_records),	\
-	FN(get_ns_current_pid_tgid),	\
-	FN(xdp_output),			\
-	FN(get_netns_cookie),		\
-	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),			\
-	FN(ktime_get_boot_ns),		\
-	FN(seq_printf),			\
-	FN(seq_write),			\
-	FN(sk_cgroup_id),		\
-	FN(sk_ancestor_cgroup_id),	\
-	FN(ringbuf_output),		\
-	FN(ringbuf_reserve),		\
-	FN(ringbuf_submit),		\
-	FN(ringbuf_discard),		\
-	FN(ringbuf_query),		\
-	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),		\
-	FN(skc_to_tcp_sock),		\
-	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),	\
-	FN(skc_to_udp6_sock),		\
-	FN(get_task_stack),		\
-	FN(load_hdr_opt),		\
-	FN(store_hdr_opt),		\
-	FN(reserve_hdr_opt),		\
-	FN(inode_storage_get),		\
-	FN(inode_storage_delete),	\
-	FN(d_path),			\
-	FN(copy_from_user),		\
-	FN(snprintf_btf),		\
-	FN(seq_printf_btf),		\
-	FN(skb_cgroup_classid),		\
-	FN(redirect_neigh),		\
-	FN(per_cpu_ptr),		\
-	FN(this_cpu_ptr),		\
-	FN(redirect_peer),		\
-	FN(task_storage_get),		\
-	FN(task_storage_delete),	\
-	FN(get_current_task_btf),	\
-	FN(bprm_opts_set),		\
-	FN(ktime_get_coarse_ns),	\
-	FN(ima_inode_hash),		\
-	FN(sock_from_file),		\
-	FN(check_mtu),			\
-	FN(for_each_map_elem),		\
-	FN(snprintf),			\
-	FN(sys_bpf),			\
-	FN(btf_find_by_name_kind),	\
-	FN(sys_close),			\
-	FN(timer_init),			\
-	FN(timer_set_callback),		\
-	FN(timer_start),		\
-	FN(timer_cancel),		\
-	FN(get_func_ip),		\
-	FN(get_attach_cookie),		\
-	FN(task_pt_regs),		\
-	FN(get_branch_snapshot),	\
-	FN(trace_vprintk),		\
-	FN(skc_to_unix_sock),		\
-	FN(kallsyms_lookup_name),	\
-	FN(find_vma),			\
-	FN(loop),			\
-	FN(strncmp),			\
-	FN(get_func_arg),		\
-	FN(get_func_ret),		\
-	FN(get_func_arg_cnt),		\
-	FN(get_retval),			\
-	FN(set_retval),			\
-	FN(xdp_get_buff_len),		\
-	FN(xdp_load_bytes),		\
-	FN(xdp_store_bytes),		\
-	FN(copy_from_user_task),	\
-	FN(skb_set_tstamp),		\
-	FN(ima_file_hash),		\
-	FN(kptr_xchg),			\
-	FN(map_lookup_percpu_elem),     \
-	FN(skc_to_mptcp_sock),		\
-	FN(dynptr_from_mem),		\
-	FN(ringbuf_reserve_dynptr),	\
-	FN(ringbuf_submit_dynptr),	\
-	FN(ringbuf_discard_dynptr),	\
-	FN(dynptr_read),		\
-	FN(dynptr_write),		\
-	FN(dynptr_data),		\
-	FN(tcp_raw_gen_syncookie_ipv4),	\
-	FN(tcp_raw_gen_syncookie_ipv6),	\
-	FN(tcp_raw_check_syncookie_ipv4),	\
-	FN(tcp_raw_check_syncookie_ipv6),	\
-	FN(ktime_get_tai_ns),		\
-	FN(user_ringbuf_drain),		\
+#define ___BPF_FUNC_MAPPER(FN, ctx...)			\
+	FN(unspec, 0, ##ctx)				\
+	FN(map_lookup_elem, 1, ##ctx)			\
+	FN(map_update_elem, 2, ##ctx)			\
+	FN(map_delete_elem, 3, ##ctx)			\
+	FN(probe_read, 4, ##ctx)			\
+	FN(ktime_get_ns, 5, ##ctx)			\
+	FN(trace_printk, 6, ##ctx)			\
+	FN(get_prandom_u32, 7, ##ctx)			\
+	FN(get_smp_processor_id, 8, ##ctx)		\
+	FN(skb_store_bytes, 9, ##ctx)			\
+	FN(l3_csum_replace, 10, ##ctx)			\
+	FN(l4_csum_replace, 11, ##ctx)			\
+	FN(tail_call, 12, ##ctx)			\
+	FN(clone_redirect, 13, ##ctx)			\
+	FN(get_current_pid_tgid, 14, ##ctx)		\
+	FN(get_current_uid_gid, 15, ##ctx)		\
+	FN(get_current_comm, 16, ##ctx)			\
+	FN(get_cgroup_classid, 17, ##ctx)		\
+	FN(skb_vlan_push, 18, ##ctx)			\
+	FN(skb_vlan_pop, 19, ##ctx)			\
+	FN(skb_get_tunnel_key, 20, ##ctx)		\
+	FN(skb_set_tunnel_key, 21, ##ctx)		\
+	FN(perf_event_read, 22, ##ctx)			\
+	FN(redirect, 23, ##ctx)				\
+	FN(get_route_realm, 24, ##ctx)			\
+	FN(perf_event_output, 25, ##ctx)		\
+	FN(skb_load_bytes, 26, ##ctx)			\
+	FN(get_stackid, 27, ##ctx)			\
+	FN(csum_diff, 28, ##ctx)			\
+	FN(skb_get_tunnel_opt, 29, ##ctx)		\
+	FN(skb_set_tunnel_opt, 30, ##ctx)		\
+	FN(skb_change_proto, 31, ##ctx)			\
+	FN(skb_change_type, 32, ##ctx)			\
+	FN(skb_under_cgroup, 33, ##ctx)			\
+	FN(get_hash_recalc, 34, ##ctx)			\
+	FN(get_current_task, 35, ##ctx)			\
+	FN(probe_write_user, 36, ##ctx)			\
+	FN(current_task_under_cgroup, 37, ##ctx)	\
+	FN(skb_change_tail, 38, ##ctx)			\
+	FN(skb_pull_data, 39, ##ctx)			\
+	FN(csum_update, 40, ##ctx)			\
+	FN(set_hash_invalid, 41, ##ctx)			\
+	FN(get_numa_node_id, 42, ##ctx)			\
+	FN(skb_change_head, 43, ##ctx)			\
+	FN(xdp_adjust_head, 44, ##ctx)			\
+	FN(probe_read_str, 45, ##ctx)			\
+	FN(get_socket_cookie, 46, ##ctx)		\
+	FN(get_socket_uid, 47, ##ctx)			\
+	FN(set_hash, 48, ##ctx)				\
+	FN(setsockopt, 49, ##ctx)			\
+	FN(skb_adjust_room, 50, ##ctx)			\
+	FN(redirect_map, 51, ##ctx)			\
+	FN(sk_redirect_map, 52, ##ctx)			\
+	FN(sock_map_update, 53, ##ctx)			\
+	FN(xdp_adjust_meta, 54, ##ctx)			\
+	FN(perf_event_read_value, 55, ##ctx)		\
+	FN(perf_prog_read_value, 56, ##ctx)		\
+	FN(getsockopt, 57, ##ctx)			\
+	FN(override_return, 58, ##ctx)			\
+	FN(sock_ops_cb_flags_set, 59, ##ctx)		\
+	FN(msg_redirect_map, 60, ##ctx)			\
+	FN(msg_apply_bytes, 61, ##ctx)			\
+	FN(msg_cork_bytes, 62, ##ctx)			\
+	FN(msg_pull_data, 63, ##ctx)			\
+	FN(bind, 64, ##ctx)				\
+	FN(xdp_adjust_tail, 65, ##ctx)			\
+	FN(skb_get_xfrm_state, 66, ##ctx)		\
+	FN(get_stack, 67, ##ctx)			\
+	FN(skb_load_bytes_relative, 68, ##ctx)		\
+	FN(fib_lookup, 69, ##ctx)			\
+	FN(sock_hash_update, 70, ##ctx)			\
+	FN(msg_redirect_hash, 71, ##ctx)		\
+	FN(sk_redirect_hash, 72, ##ctx)			\
+	FN(lwt_push_encap, 73, ##ctx)			\
+	FN(lwt_seg6_store_bytes, 74, ##ctx)		\
+	FN(lwt_seg6_adjust_srh, 75, ##ctx)		\
+	FN(lwt_seg6_action, 76, ##ctx)			\
+	FN(rc_repeat, 77, ##ctx)			\
+	FN(rc_keydown, 78, ##ctx)			\
+	FN(skb_cgroup_id, 79, ##ctx)			\
+	FN(get_current_cgroup_id, 80, ##ctx)		\
+	FN(get_local_storage, 81, ##ctx)		\
+	FN(sk_select_reuseport, 82, ##ctx)		\
+	FN(skb_ancestor_cgroup_id, 83, ##ctx)		\
+	FN(sk_lookup_tcp, 84, ##ctx)			\
+	FN(sk_lookup_udp, 85, ##ctx)			\
+	FN(sk_release, 86, ##ctx)			\
+	FN(map_push_elem, 87, ##ctx)			\
+	FN(map_pop_elem, 88, ##ctx)			\
+	FN(map_peek_elem, 89, ##ctx)			\
+	FN(msg_push_data, 90, ##ctx)			\
+	FN(msg_pop_data, 91, ##ctx)			\
+	FN(rc_pointer_rel, 92, ##ctx)			\
+	FN(spin_lock, 93, ##ctx)			\
+	FN(spin_unlock, 94, ##ctx)			\
+	FN(sk_fullsock, 95, ##ctx)			\
+	FN(tcp_sock, 96, ##ctx)				\
+	FN(skb_ecn_set_ce, 97, ##ctx)			\
+	FN(get_listener_sock, 98, ##ctx)		\
+	FN(skc_lookup_tcp, 99, ##ctx)			\
+	FN(tcp_check_syncookie, 100, ##ctx)		\
+	FN(sysctl_get_name, 101, ##ctx)			\
+	FN(sysctl_get_current_value, 102, ##ctx)	\
+	FN(sysctl_get_new_value, 103, ##ctx)		\
+	FN(sysctl_set_new_value, 104, ##ctx)		\
+	FN(strtol, 105, ##ctx)				\
+	FN(strtoul, 106, ##ctx)				\
+	FN(sk_storage_get, 107, ##ctx)			\
+	FN(sk_storage_delete, 108, ##ctx)		\
+	FN(send_signal, 109, ##ctx)			\
+	FN(tcp_gen_syncookie, 110, ##ctx)		\
+	FN(skb_output, 111, ##ctx)			\
+	FN(probe_read_user, 112, ##ctx)			\
+	FN(probe_read_kernel, 113, ##ctx)		\
+	FN(probe_read_user_str, 114, ##ctx)		\
+	FN(probe_read_kernel_str, 115, ##ctx)		\
+	FN(tcp_send_ack, 116, ##ctx)			\
+	FN(send_signal_thread, 117, ##ctx)		\
+	FN(jiffies64, 118, ##ctx)			\
+	FN(read_branch_records, 119, ##ctx)		\
+	FN(get_ns_current_pid_tgid, 120, ##ctx)		\
+	FN(xdp_output, 121, ##ctx)			\
+	FN(get_netns_cookie, 122, ##ctx)		\
+	FN(get_current_ancestor_cgroup_id, 123, ##ctx)	\
+	FN(sk_assign, 124, ##ctx)			\
+	FN(ktime_get_boot_ns, 125, ##ctx)		\
+	FN(seq_printf, 126, ##ctx)			\
+	FN(seq_write, 127, ##ctx)			\
+	FN(sk_cgroup_id, 128, ##ctx)			\
+	FN(sk_ancestor_cgroup_id, 129, ##ctx)		\
+	FN(ringbuf_output, 130, ##ctx)			\
+	FN(ringbuf_reserve, 131, ##ctx)			\
+	FN(ringbuf_submit, 132, ##ctx)			\
+	FN(ringbuf_discard, 133, ##ctx)			\
+	FN(ringbuf_query, 134, ##ctx)			\
+	FN(csum_level, 135, ##ctx)			\
+	FN(skc_to_tcp6_sock, 136, ##ctx)		\
+	FN(skc_to_tcp_sock, 137, ##ctx)			\
+	FN(skc_to_tcp_timewait_sock, 138, ##ctx)	\
+	FN(skc_to_tcp_request_sock, 139, ##ctx)		\
+	FN(skc_to_udp6_sock, 140, ##ctx)		\
+	FN(get_task_stack, 141, ##ctx)			\
+	FN(load_hdr_opt, 142, ##ctx)			\
+	FN(store_hdr_opt, 143, ##ctx)			\
+	FN(reserve_hdr_opt, 144, ##ctx)			\
+	FN(inode_storage_get, 145, ##ctx)		\
+	FN(inode_storage_delete, 146, ##ctx)		\
+	FN(d_path, 147, ##ctx)				\
+	FN(copy_from_user, 148, ##ctx)			\
+	FN(snprintf_btf, 149, ##ctx)			\
+	FN(seq_printf_btf, 150, ##ctx)			\
+	FN(skb_cgroup_classid, 151, ##ctx)		\
+	FN(redirect_neigh, 152, ##ctx)			\
+	FN(per_cpu_ptr, 153, ##ctx)			\
+	FN(this_cpu_ptr, 154, ##ctx)			\
+	FN(redirect_peer, 155, ##ctx)			\
+	FN(task_storage_get, 156, ##ctx)		\
+	FN(task_storage_delete, 157, ##ctx)		\
+	FN(get_current_task_btf, 158, ##ctx)		\
+	FN(bprm_opts_set, 159, ##ctx)			\
+	FN(ktime_get_coarse_ns, 160, ##ctx)		\
+	FN(ima_inode_hash, 161, ##ctx)			\
+	FN(sock_from_file, 162, ##ctx)			\
+	FN(check_mtu, 163, ##ctx)			\
+	FN(for_each_map_elem, 164, ##ctx)		\
+	FN(snprintf, 165, ##ctx)			\
+	FN(sys_bpf, 166, ##ctx)				\
+	FN(btf_find_by_name_kind, 167, ##ctx)		\
+	FN(sys_close, 168, ##ctx)			\
+	FN(timer_init, 169, ##ctx)			\
+	FN(timer_set_callback, 170, ##ctx)		\
+	FN(timer_start, 171, ##ctx)			\
+	FN(timer_cancel, 172, ##ctx)			\
+	FN(get_func_ip, 173, ##ctx)			\
+	FN(get_attach_cookie, 174, ##ctx)		\
+	FN(task_pt_regs, 175, ##ctx)			\
+	FN(get_branch_snapshot, 176, ##ctx)		\
+	FN(trace_vprintk, 177, ##ctx)			\
+	FN(skc_to_unix_sock, 178, ##ctx)		\
+	FN(kallsyms_lookup_name, 179, ##ctx)		\
+	FN(find_vma, 180, ##ctx)			\
+	FN(loop, 181, ##ctx)				\
+	FN(strncmp, 182, ##ctx)				\
+	FN(get_func_arg, 183, ##ctx)			\
+	FN(get_func_ret, 184, ##ctx)			\
+	FN(get_func_arg_cnt, 185, ##ctx)		\
+	FN(get_retval, 186, ##ctx)			\
+	FN(set_retval, 187, ##ctx)			\
+	FN(xdp_get_buff_len, 188, ##ctx)		\
+	FN(xdp_load_bytes, 189, ##ctx)			\
+	FN(xdp_store_bytes, 190, ##ctx)			\
+	FN(copy_from_user_task, 191, ##ctx)		\
+	FN(skb_set_tstamp, 192, ##ctx)			\
+	FN(ima_file_hash, 193, ##ctx)			\
+	FN(kptr_xchg, 194, ##ctx)			\
+	FN(map_lookup_percpu_elem, 195, ##ctx)		\
+	FN(skc_to_mptcp_sock, 196, ##ctx)		\
+	FN(dynptr_from_mem, 197, ##ctx)			\
+	FN(ringbuf_reserve_dynptr, 198, ##ctx)		\
+	FN(ringbuf_submit_dynptr, 199, ##ctx)		\
+	FN(ringbuf_discard_dynptr, 200, ##ctx)		\
+	FN(dynptr_read, 201, ##ctx)			\
+	FN(dynptr_write, 202, ##ctx)			\
+	FN(dynptr_data, 203, ##ctx)			\
+	FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx)	\
+	FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx)	\
+	FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx)	\
+	FN(ktime_get_tai_ns, 208, ##ctx)		\
+	FN(user_ringbuf_drain, 209, ##ctx)		\
 	/* */
 
+/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
+ * know or care about integer value that is now passed as second argument
+ */
+#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name),
+#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN)
+
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
  */
-#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y,
 enum bpf_func_id {
-	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+	___BPF_FUNC_MAPPER(__BPF_ENUM_FN)
 	__BPF_FUNC_MAX_ID,
 };
 #undef __BPF_ENUM_FN
-- 
cgit v1.2.3-58-ga151


From 8206e4e95230daeeba43c59fc7c39656883ecd62 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 5 Oct 2022 22:34:29 -0700
Subject: selftests/bpf: Add selftest deny_namespace to s390x deny list

BPF CI reported that selftest deny_namespace failed with s390x.

  test_unpriv_userns_create_no_bpf:PASS:no-bpf unpriv new user ns 0 nsec
  test_deny_namespace:PASS:skel load 0 nsec
  libbpf: prog 'test_userns_create': failed to attach: ERROR: strerror_r(-524)=22
  libbpf: prog 'test_userns_create': failed to auto-attach: -524
  test_deny_namespace:FAIL:attach unexpected error: -524 (errno 524)
  #57/1    deny_namespace/unpriv_userns_create_no_bpf:FAIL
  #57      deny_namespace:FAIL

BPF program test_userns_create is a BPF LSM type program which is
based on trampoline and s390x does not support s390x. Let add the
test to x390x deny list to avoid this failure in BPF CI.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221006053429.3549165-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 17e074eb42b8..0fb03b8047d5 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -75,3 +75,4 @@ user_ringbuf                             # failed to find kernel BTF type ID of
 lookup_key                               # JIT does not support calling kernel function                                (kfunc)
 verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
 kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
+deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-- 
cgit v1.2.3-58-ga151


From 1d2d941bc140b34587b4c889699fb0f89d29937f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 6 Oct 2022 10:31:06 +0200
Subject: selftests/bpf: Add missing bpf_iter_vma_offset__destroy call

Adding missing bpf_iter_vma_offset__destroy call and using in-skeletin
link pointer so we don't need extra bpf_link__destroy call.

Fixes: b3e1331eb925 ("selftests/bpf: Test parameterized task BPF iterators.")
Cc: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221006083106.117987-1-jolsa@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 3369c5ec3a17..ecde236047fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1498,7 +1498,6 @@ static noinline int trigger_func(int arg)
 static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc)
 {
 	struct bpf_iter_vma_offset *skel;
-	struct bpf_link *link;
 	char buf[16] = {};
 	int iter_fd, len;
 	int pgsz, shift;
@@ -1513,11 +1512,11 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool
 		;
 	skel->bss->page_shift = shift;
 
-	link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts);
-	if (!ASSERT_OK_PTR(link, "attach_iter"))
-		return;
+	skel->links.get_vma_offset = bpf_program__attach_iter(skel->progs.get_vma_offset, opts);
+	if (!ASSERT_OK_PTR(skel->links.get_vma_offset, "attach_iter"))
+		goto exit;
 
-	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.get_vma_offset));
 	if (!ASSERT_GT(iter_fd, 0, "create_iter"))
 		goto exit;
 
@@ -1535,7 +1534,7 @@ static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool
 	close(iter_fd);
 
 exit:
-	bpf_link__destroy(link);
+	bpf_iter_vma_offset__destroy(skel);
 }
 
 static void test_task_vma_offset(void)
-- 
cgit v1.2.3-58-ga151


From 7a366da2d2ba86316d3ec408f70d19b63916f9ce Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:31 +0200
Subject: libbpf: Fix LIBBPF_1.0.0 declaration in libbpf.map

Add the missing LIBBPF_0.8.0 at the end of the LIBBPF_1.0.0 declaration,
similarly to other version declarations.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-2-roberto.sassu@huaweicloud.com
---
 tools/lib/bpf/libbpf.map | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c1d6aa7c82b6..04fab9f1fdd7 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -367,7 +367,7 @@ LIBBPF_1.0.0 {
 		libbpf_bpf_map_type_str;
 		libbpf_bpf_prog_type_str;
 		perf_buffer__buffer;
-};
+} LIBBPF_0.8.0;
 
 LIBBPF_1.1.0 {
 	global:
-- 
cgit v1.2.3-58-ga151


From 243e300563b1b39ac669c7698742931427699184 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:32 +0200
Subject: libbpf: Introduce bpf_get_fd_by_id_opts and
 bpf_map_get_fd_by_id_opts()

Define a new data structure called bpf_get_fd_by_id_opts, with the member
open_flags, to be used by callers of the _opts variants of
bpf_*_get_fd_by_id() to specify the permissions needed for the file
descriptor to be obtained.

Also, introduce bpf_map_get_fd_by_id_opts(), to let the caller pass a
bpf_get_fd_by_id_opts structure.

Finally, keep the existing bpf_map_get_fd_by_id(), and call
bpf_map_get_fd_by_id_opts() with NULL as opts argument, to request
read-write permissions (current behavior).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-3-roberto.sassu@huaweicloud.com
---
 tools/lib/bpf/bpf.c      | 12 +++++++++++-
 tools/lib/bpf/bpf.h      | 10 ++++++++++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 1d49a0352836..c08d7509553d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -948,19 +948,29 @@ int bpf_prog_get_fd_by_id(__u32 id)
 	return libbpf_err_errno(fd);
 }
 
-int bpf_map_get_fd_by_id(__u32 id)
+int bpf_map_get_fd_by_id_opts(__u32 id,
+			      const struct bpf_get_fd_by_id_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
+	if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, attr_sz);
 	attr.map_id = id;
+	attr.open_flags = OPTS_GET(opts, open_flags, 0);
 
 	fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
+int bpf_map_get_fd_by_id(__u32 id)
+{
+	return bpf_map_get_fd_by_id_opts(id, NULL);
+}
+
 int bpf_btf_get_fd_by_id(__u32 id)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9c50beabdd14..10ce38f0a9ef 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -365,8 +365,18 @@ LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
+
+struct bpf_get_fd_by_id_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 open_flags; /* permissions requested for the operation on fd */
+	size_t :0;
+};
+#define bpf_get_fd_by_id_opts__last_field open_flags
+
 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 04fab9f1fdd7..2e665b21d84f 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -371,6 +371,7 @@ LIBBPF_1.0.0 {
 
 LIBBPF_1.1.0 {
 	global:
+		bpf_map_get_fd_by_id_opts;
 		user_ring_buffer__discard;
 		user_ring_buffer__free;
 		user_ring_buffer__new;
-- 
cgit v1.2.3-58-ga151


From 8f13f168ea14333ac971e5404800b7fb3658e782 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:33 +0200
Subject: libbpf: Introduce bpf_prog_get_fd_by_id_opts()

Introduce bpf_prog_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_prog_get_fd_by_id(), and call bpf_prog_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_prog_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-4-roberto.sassu@huaweicloud.com
---
 tools/lib/bpf/bpf.c      | 12 +++++++++++-
 tools/lib/bpf/bpf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c08d7509553d..03be8c96bbac 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -935,19 +935,29 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
 	return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
 }
 
-int bpf_prog_get_fd_by_id(__u32 id)
+int bpf_prog_get_fd_by_id_opts(__u32 id,
+			       const struct bpf_get_fd_by_id_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
+	if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, attr_sz);
 	attr.prog_id = id;
+	attr.open_flags = OPTS_GET(opts, open_flags, 0);
 
 	fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+	return bpf_prog_get_fd_by_id_opts(id, NULL);
+}
+
 int bpf_map_get_fd_by_id_opts(__u32 id,
 			      const struct bpf_get_fd_by_id_opts *opts)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 10ce38f0a9ef..4558bafbce14 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -374,6 +374,8 @@ struct bpf_get_fd_by_id_opts {
 #define bpf_get_fd_by_id_opts__last_field open_flags
 
 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
 				const struct bpf_get_fd_by_id_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2e665b21d84f..c3604eaa220d 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -372,6 +372,7 @@ LIBBPF_1.0.0 {
 LIBBPF_1.1.0 {
 	global:
 		bpf_map_get_fd_by_id_opts;
+		bpf_prog_get_fd_by_id_opts;
 		user_ring_buffer__discard;
 		user_ring_buffer__free;
 		user_ring_buffer__new;
-- 
cgit v1.2.3-58-ga151


From 2ce7cbf2ba71bb03542af739af80e86b6855ae48 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:34 +0200
Subject: libbpf: Introduce bpf_btf_get_fd_by_id_opts()

Introduce bpf_btf_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_btf_get_fd_by_id(), and call bpf_btf_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_btf_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-5-roberto.sassu@huaweicloud.com
---
 tools/lib/bpf/bpf.c      | 12 +++++++++++-
 tools/lib/bpf/bpf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 03be8c96bbac..b95fed0c1644 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -981,19 +981,29 @@ int bpf_map_get_fd_by_id(__u32 id)
 	return bpf_map_get_fd_by_id_opts(id, NULL);
 }
 
-int bpf_btf_get_fd_by_id(__u32 id)
+int bpf_btf_get_fd_by_id_opts(__u32 id,
+			      const struct bpf_get_fd_by_id_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
+	if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, attr_sz);
 	attr.btf_id = id;
+	attr.open_flags = OPTS_GET(opts, open_flags, 0);
 
 	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+	return bpf_btf_get_fd_by_id_opts(id, NULL);
+}
+
 int bpf_link_get_fd_by_id(__u32 id)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4558bafbce14..4b487305eeb8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -380,6 +380,8 @@ LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id,
 				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c3604eaa220d..7011d5eec67b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -371,6 +371,7 @@ LIBBPF_1.0.0 {
 
 LIBBPF_1.1.0 {
 	global:
+		bpf_btf_get_fd_by_id_opts;
 		bpf_map_get_fd_by_id_opts;
 		bpf_prog_get_fd_by_id_opts;
 		user_ring_buffer__discard;
-- 
cgit v1.2.3-58-ga151


From 97c8f9dd5db839f2387785ee936d0a5b257b31d3 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:35 +0200
Subject: libbpf: Introduce bpf_link_get_fd_by_id_opts()

Introduce bpf_link_get_fd_by_id_opts(), for symmetry with
bpf_map_get_fd_by_id_opts(), to let the caller pass the newly introduced
data structure bpf_get_fd_by_id_opts. Keep the existing
bpf_link_get_fd_by_id(), and call bpf_link_get_fd_by_id_opts() with NULL as
opts argument, to prevent setting open_flags.

Currently, the kernel does not support non-zero open_flags for
bpf_link_get_fd_by_id_opts(), and a call with them will result in an error
returned by the bpf() system call. The caller should always pass zero
open_flags.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-6-roberto.sassu@huaweicloud.com
---
 tools/lib/bpf/bpf.c      | 12 +++++++++++-
 tools/lib/bpf/bpf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b95fed0c1644..9aff98f42a3d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1004,19 +1004,29 @@ int bpf_btf_get_fd_by_id(__u32 id)
 	return bpf_btf_get_fd_by_id_opts(id, NULL);
 }
 
-int bpf_link_get_fd_by_id(__u32 id)
+int bpf_link_get_fd_by_id_opts(__u32 id,
+			       const struct bpf_get_fd_by_id_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
+	if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, attr_sz);
 	attr.link_id = id;
+	attr.open_flags = OPTS_GET(opts, open_flags, 0);
 
 	fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
+int bpf_link_get_fd_by_id(__u32 id)
+{
+	return bpf_link_get_fd_by_id_opts(id, NULL);
+}
+
 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, info);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4b487305eeb8..a112e0ed1b19 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -383,6 +383,8 @@ LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id,
 				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
+				const struct bpf_get_fd_by_id_opts *opts);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
 
 struct bpf_prog_query_opts {
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7011d5eec67b..71bf5691a689 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -372,6 +372,7 @@ LIBBPF_1.0.0 {
 LIBBPF_1.1.0 {
 	global:
 		bpf_btf_get_fd_by_id_opts;
+		bpf_link_get_fd_by_id_opts;
 		bpf_map_get_fd_by_id_opts;
 		bpf_prog_get_fd_by_id_opts;
 		user_ring_buffer__discard;
-- 
cgit v1.2.3-58-ga151


From a9c7c18b57594c72a63fad749021b743c65a098b Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 6 Oct 2022 13:07:36 +0200
Subject: selftests/bpf: Add tests for _opts variants of bpf_*_get_fd_by_id()

Introduce the data_input map, write-protected with a small eBPF program
implementing the lsm/bpf_map hook.

Then, ensure that bpf_map_get_fd_by_id() and bpf_map_get_fd_by_id_opts()
with NULL opts don't succeed due to requesting read-write access to the
write-protected map. Also, ensure that bpf_map_get_fd_by_id_opts() with
open_flags in opts set to BPF_F_RDONLY instead succeeds.

After obtaining a read-only fd, ensure that only map lookup succeeds and
not update. Ensure that update works only with the read-write fd obtained
at program loading time, when the write protection was not yet enabled.

Finally, ensure that the other _opts variants of bpf_*_get_fd_by_id() don't
work if the BPF_F_RDONLY flag is set in opts (due to the kernel not
handling the open_flags member of bpf_attr).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221006110736.84253-7-roberto.sassu@huaweicloud.com
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |  1 +
 .../bpf/prog_tests/libbpf_get_fd_by_id_opts.c      | 87 ++++++++++++++++++++++
 .../bpf/progs/test_libbpf_get_fd_by_id_opts.c      | 36 +++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 0fb03b8047d5..beef1232a47a 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -76,3 +76,4 @@ lookup_key                               # JIT does not support calling kernel f
 verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
 kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
 deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+libbpf_get_fd_by_id_opts                 # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..25e5dfa9c315
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+
+#include "test_libbpf_get_fd_by_id_opts.skel.h"
+
+void test_libbpf_get_fd_by_id_opts(void)
+{
+	struct test_libbpf_get_fd_by_id_opts *skel;
+	struct bpf_map_info info_m = {};
+	__u32 len = sizeof(info_m), value;
+	int ret, zero = 0, fd = -1;
+	LIBBPF_OPTS(bpf_get_fd_by_id_opts, fd_opts_rdonly,
+		.open_flags = BPF_F_RDONLY,
+	);
+
+	skel = test_libbpf_get_fd_by_id_opts__open_and_load();
+	if (!ASSERT_OK_PTR(skel,
+			   "test_libbpf_get_fd_by_id_opts__open_and_load"))
+		return;
+
+	ret = test_libbpf_get_fd_by_id_opts__attach(skel);
+	if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach"))
+		goto close_prog;
+
+	ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.data_input),
+				     &info_m, &len);
+	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+		goto close_prog;
+
+	fd = bpf_map_get_fd_by_id(info_m.id);
+	if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id"))
+		goto close_prog;
+
+	fd = bpf_map_get_fd_by_id_opts(info_m.id, NULL);
+	if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id_opts"))
+		goto close_prog;
+
+	fd = bpf_map_get_fd_by_id_opts(info_m.id, &fd_opts_rdonly);
+	if (!ASSERT_GE(fd, 0, "bpf_map_get_fd_by_id_opts"))
+		goto close_prog;
+
+	/* Map lookup should work with read-only fd. */
+	ret = bpf_map_lookup_elem(fd, &zero, &value);
+	if (!ASSERT_OK(ret, "bpf_map_lookup_elem"))
+		goto close_prog;
+
+	if (!ASSERT_EQ(value, 0, "map value mismatch"))
+		goto close_prog;
+
+	/* Map update should not work with read-only fd. */
+	ret = bpf_map_update_elem(fd, &zero, &len, BPF_ANY);
+	if (!ASSERT_LT(ret, 0, "bpf_map_update_elem"))
+		goto close_prog;
+
+	/* Map update should work with read-write fd. */
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.data_input), &zero,
+				  &len, BPF_ANY);
+	if (!ASSERT_OK(ret, "bpf_map_update_elem"))
+		goto close_prog;
+
+	/* Prog get fd with opts set should not work (no kernel support). */
+	ret = bpf_prog_get_fd_by_id_opts(0, &fd_opts_rdonly);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_prog_get_fd_by_id_opts"))
+		goto close_prog;
+
+	/* Link get fd with opts set should not work (no kernel support). */
+	ret = bpf_link_get_fd_by_id_opts(0, &fd_opts_rdonly);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_link_get_fd_by_id_opts"))
+		goto close_prog;
+
+	/* BTF get fd with opts set should not work (no kernel support). */
+	ret = bpf_btf_get_fd_by_id_opts(0, &fd_opts_rdonly);
+	ASSERT_EQ(ret, -EINVAL, "bpf_btf_get_fd_by_id_opts");
+
+close_prog:
+	if (fd >= 0)
+		close(fd);
+
+	test_libbpf_get_fd_by_id_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..f5ac5f3e8919
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE	0x2
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm/bpf_map")
+int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
+{
+	if (map != (struct bpf_map *)&data_input)
+		return 0;
+
+	if (fmode & FMODE_WRITE)
+		return -EACCES;
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From d31ada3b511141f4b78cae5a05cc2dad887c40b7 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Tue, 11 Oct 2022 11:52:55 -0500
Subject: selftests/bpf: Alphabetize DENYLISTs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The DENYLIST and DENYLIST.s390x files are used to specify testcases
which should not be run on CI. Currently, testcases are appended to the
end of these files as needed. This can make it a pain to resolve merge
conflicts. This patch alphabetizes the DENYLIST files to ease this
burden.

Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/r/20221011165255.774014-1-void@manifault.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST       |  3 ++-
 tools/testing/selftests/bpf/DENYLIST.s390x | 41 +++++++++++++++---------------
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
index 939de574fc7f..f748f2c33b22 100644
--- a/tools/testing/selftests/bpf/DENYLIST
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -1,6 +1,7 @@
 # TEMPORARY
+# Alphabetical order
 get_stack_raw_tp    # spams with kernel warnings until next bpf -> bpf-next merge
-stacktrace_build_id_nmi
 stacktrace_build_id
+stacktrace_build_id_nmi
 task_fd_query_rawtp
 varlen
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index beef1232a47a..520f12229b98 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,13 +1,18 @@
 # TEMPORARY
+# Alphabetical order
 atomics                                  # attach(add): actual -524 <= expected 0                                      (trampoline)
-bpf_iter_setsockopt                      # JIT does not support calling kernel function                                (kfunc)
 bloom_filter_map                         # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
-bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
+bpf_cookie                               # failed to open_and_load program: -524 (trampoline)
+bpf_iter_setsockopt                      # JIT does not support calling kernel function                                (kfunc)
 bpf_loop                                 # attaches to __x64_sys_nanosleep
 bpf_mod_race                             # BPF trampoline
 bpf_nf                                   # JIT does not support calling kernel function
+bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
+cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
+cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
 core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
 d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
+deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
 dummy_st_ops                             # test_run unexpected error: -524 (errno 524)                                 (trampoline)
 fentry_fexit                             # fentry attach failed: -524                                                  (trampoline)
 fentry_test                              # fentry_first_attach unexpected error: -524                                  (trampoline)
@@ -18,19 +23,28 @@ fexit_test                               # fexit_first_attach unexpected error:
 get_func_args_test	                 # trampoline
 get_func_ip_test                         # get_func_ip_test__attach unexpected error: -524                             (trampoline)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
+htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
 kfree_skb                                # attach fentry unexpected error: -524                                        (trampoline)
 kfunc_call                               # 'bpf_prog_active': not found in kernel BTF                                  (?)
+kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
+kprobe_multi_test                        # relies on fentry
 ksyms_module                             # test_ksyms_module__open_and_load unexpected error: -9                       (?)
 ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
 ksyms_module_lskel                       # test_ksyms_module_lskel__open_and_load unexpected error: -9                 (?)
+libbpf_get_fd_by_id_opts                 # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+lookup_key                               # JIT does not support calling kernel function                                (kfunc)
+lru_bug                                  # prog 'printk': failed to auto-attach: -524
+map_kptr                                 # failed to open_and_load program: -524 (trampoline)
 modify_return                            # modify_return attach failed: -524                                           (trampoline)
 module_attach                            # skel_attach skeleton attach failed: -524                                    (trampoline)
 mptcp
-kprobe_multi_test                        # relies on fentry
 netcnt                                   # failed to load BPF skeleton 'netcnt_prog': -7                               (?)
 probe_user                               # check_kprobe_res wrong kprobe res from probe read                           (?)
 recursion                                # skel_attach unexpected error: -524                                          (trampoline)
 ringbuf                                  # skel_load skeleton load failed                                              (?)
+select_reuseport                         # intermittently fails on new s390x setup
+send_signal                              # intermittently fails to receive signal
+setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
 sk_assign                                # Can't read on server: Invalid argument                                      (?)
 sk_lookup                                # endianness problem
 sk_storage_tracing                       # test_sk_storage_tracing__attach unexpected error: -524                      (trampoline)
@@ -52,28 +66,15 @@ timer_mim                                # failed to auto-attach program 'test1'
 trace_ext                                # failed to auto-attach program 'test_pkt_md_access_new': -524                (trampoline)
 trace_printk                             # trace_printk__load unexpected error: -2 (errno 2)                           (?)
 trace_vprintk                            # trace_vprintk__open_and_load unexpected error: -9                           (?)
+tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
 trampoline_count                         # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22                  (trampoline)
+unpriv_bpf_disabled                      # fentry
+user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
 verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
+verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
 vmlinux                                  # failed to auto-attach program 'handle__fentry': -524                        (trampoline)
 xdp_adjust_tail                          # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520                  (?)
 xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
 xdp_bpf2bpf                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
-map_kptr                                 # failed to open_and_load program: -524 (trampoline)
-bpf_cookie                               # failed to open_and_load program: -524 (trampoline)
 xdp_do_redirect                          # prog_run_max_size unexpected error: -22 (errno 22)
-send_signal                              # intermittently fails to receive signal
-select_reuseport                         # intermittently fails on new s390x setup
 xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
-unpriv_bpf_disabled                      # fentry
-lru_bug                                  # prog 'printk': failed to auto-attach: -524
-setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
-cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
-cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
-htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
-user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
-lookup_key                               # JIT does not support calling kernel function                                (kfunc)
-verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
-kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
-deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-libbpf_get_fd_by_id_opts                 # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-- 
cgit v1.2.3-58-ga151


From de9c8d848d90cf2e53aced50b350827442ca5a4f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Wed, 12 Oct 2022 15:12:35 -0700
Subject: selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and
 xdp_synproxy test

The recent vm image in CI has reported error in selftests that use
the iptables command.  Manu Bretelle has pointed out the difference
in the recent vm image that the iptables is sym-linked to the iptables-nft.
With this knowledge,  I can also reproduce the CI error by manually running
with the 'iptables-nft'.

This patch is to replace the iptables command with iptables-legacy
to unblock the CI tests.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20221012221235.3529719-1-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c       | 6 +++---
 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 8a838ea8bdf3..c8ba4009e4ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -49,14 +49,14 @@ out:
 
 static void test_bpf_nf_ct(int mode)
 {
-	const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
+	const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
 	int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
 	struct sockaddr_in peer_addr = {};
 	struct test_bpf_nf *skel;
 	int prog_fd, err;
 	socklen_t len;
 	u16 srv_port;
-	char cmd[64];
+	char cmd[128];
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
@@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode)
 
 	/* Enable connection tracking */
 	snprintf(cmd, sizeof(cmd), iptables, "-A");
-	if (!ASSERT_OK(system(cmd), "iptables"))
+	if (!ASSERT_OK(system(cmd), cmd))
 		goto end;
 
 	srv_port = (mode == TEST_XDP) ? 5005 : 5006;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index 75550a40e029..c72083885b6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -94,12 +94,12 @@ static void test_synproxy(bool xdp)
 	SYS("sysctl -w net.ipv4.tcp_syncookies=2");
 	SYS("sysctl -w net.ipv4.tcp_timestamps=1");
 	SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
-	SYS("iptables -t raw -I PREROUTING \
+	SYS("iptables-legacy -t raw -I PREROUTING \
 	    -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
-	SYS("iptables -t filter -A INPUT \
+	SYS("iptables-legacy -t filter -A INPUT \
 	    -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
 	    -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
-	SYS("iptables -t filter -A INPUT \
+	SYS("iptables-legacy -t filter -A INPUT \
 	    -i tmp1 -m state --state INVALID -j DROP");
 
 	ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
-- 
cgit v1.2.3-58-ga151


From 93c660ca40b5d2f7c1b1626e955a8e9fa30e0749 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:03 -0400
Subject: libbpf: Fix use-after-free in btf_dump_name_dups

ASAN reports an use-after-free in btf_dump_name_dups:

ERROR: AddressSanitizer: heap-use-after-free on address 0xffff927006db at pc 0xaaaab5dfb618 bp 0xffffdd89b890 sp 0xffffdd89b928
READ of size 2 at 0xffff927006db thread T0
    #0 0xaaaab5dfb614 in __interceptor_strcmp.part.0 (test_progs+0x21b614)
    #1 0xaaaab635f144 in str_equal_fn tools/lib/bpf/btf_dump.c:127
    #2 0xaaaab635e3e0 in hashmap_find_entry tools/lib/bpf/hashmap.c:143
    #3 0xaaaab635e72c in hashmap__find tools/lib/bpf/hashmap.c:212
    #4 0xaaaab6362258 in btf_dump_name_dups tools/lib/bpf/btf_dump.c:1525
    #5 0xaaaab636240c in btf_dump_resolve_name tools/lib/bpf/btf_dump.c:1552
    #6 0xaaaab6362598 in btf_dump_type_name tools/lib/bpf/btf_dump.c:1567
    #7 0xaaaab6360b48 in btf_dump_emit_struct_def tools/lib/bpf/btf_dump.c:912
    #8 0xaaaab6360630 in btf_dump_emit_type tools/lib/bpf/btf_dump.c:798
    #9 0xaaaab635f720 in btf_dump__dump_type tools/lib/bpf/btf_dump.c:282
    #10 0xaaaab608523c in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:236
    #11 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #12 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #13 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #14 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #15 0xaaaab5d65990  (test_progs+0x185990)

0xffff927006db is located 11 bytes inside of 16-byte region [0xffff927006d0,0xffff927006e0)
freed by thread T0 here:
    #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4)
    #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191
    #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163
    #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106
    #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157
    #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519
    #6 0xaaaab6353e10 in btf__add_field tools/lib/bpf/btf.c:2032
    #7 0xaaaab6084fcc in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:232
    #8 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #9 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #10 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #11 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #12 0xaaaab5d65990  (test_progs+0x185990)

previously allocated by thread T0 here:
    #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4)
    #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191
    #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163
    #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106
    #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157
    #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519
    #6 0xaaaab6353ff0 in btf_add_enum_common tools/lib/bpf/btf.c:2070
    #7 0xaaaab6354080 in btf__add_enum tools/lib/bpf/btf.c:2102
    #8 0xaaaab6082f50 in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:162
    #9 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875
    #10 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062
    #11 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697
    #12 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308
    #13 0xaaaab5d65990  (test_progs+0x185990)

The reason is that the key stored in hash table name_map is a string
address, and the string memory is allocated by realloc() function, when
the memory is resized by realloc() later, the old memory may be freed,
so the address stored in name_map references to a freed memory, causing
use-after-free.

Fix it by storing duplicated string address in name_map.

Fixes: 919d2b1dbb07 ("libbpf: Allow modification of BTF and add btf__add_str API")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-2-xukuohai@huaweicloud.com
---
 tools/lib/bpf/btf_dump.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e4da6de68d8f..bf0cc0e986dd 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -219,6 +219,17 @@ static int btf_dump_resize(struct btf_dump *d)
 	return 0;
 }
 
+static void btf_dump_free_names(struct hashmap *map)
+{
+	size_t bkt;
+	struct hashmap_entry *cur;
+
+	hashmap__for_each_entry(map, cur, bkt)
+		free((void *)cur->key);
+
+	hashmap__free(map);
+}
+
 void btf_dump__free(struct btf_dump *d)
 {
 	int i;
@@ -237,8 +248,8 @@ void btf_dump__free(struct btf_dump *d)
 	free(d->cached_names);
 	free(d->emit_queue);
 	free(d->decl_stack);
-	hashmap__free(d->type_names);
-	hashmap__free(d->ident_names);
+	btf_dump_free_names(d->type_names);
+	btf_dump_free_names(d->ident_names);
 
 	free(d);
 }
@@ -1524,11 +1535,23 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
 				 const char *orig_name)
 {
+	char *old_name, *new_name;
 	size_t dup_cnt = 0;
+	int err;
+
+	new_name = strdup(orig_name);
+	if (!new_name)
+		return 1;
 
 	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
 	dup_cnt++;
-	hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+	err = hashmap__set(name_map, new_name, (void *)dup_cnt,
+			   (const void **)&old_name, NULL);
+	if (err)
+		free(new_name);
+
+	free(old_name);
 
 	return dup_cnt;
 }
-- 
cgit v1.2.3-58-ga151


From 0dc9254e03704c75f2ebc9cbef2ce4de83fba603 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:04 -0400
Subject: libbpf: Fix memory leak in parse_usdt_arg()

In the arm64 version of parse_usdt_arg(), when sscanf returns 2, reg_name
is allocated but not freed. Fix it.

Fixes: 0f8619929c57 ("libbpf: Usdt aarch64 arg parsing support")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-3-xukuohai@huaweicloud.com
---
 tools/lib/bpf/usdt.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index e83b497c2245..49f3c3b7f609 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1348,25 +1348,23 @@ static int calc_pt_regs_off(const char *reg_name)
 
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
 {
-	char *reg_name = NULL;
+	char reg_name[16];
 	int arg_sz, len, reg_off;
 	long off;
 
-	if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, &reg_name, &off, &len) == 3) {
+	if (sscanf(arg_str, " %d @ \[ %15[a-z0-9], %ld ] %n", &arg_sz, reg_name, &off, &len) == 3) {
 		/* Memory dereference case, e.g., -4@[sp, 96] */
 		arg->arg_type = USDT_ARG_REG_DEREF;
 		arg->val_off = off;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
-	} else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, &reg_name, &len) == 2) {
+	} else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", &arg_sz, reg_name, &len) == 2) {
 		/* Memory dereference case, e.g., -4@[sp] */
 		arg->arg_type = USDT_ARG_REG_DEREF;
 		arg->val_off = 0;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
@@ -1375,12 +1373,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 		arg->arg_type = USDT_ARG_CONST;
 		arg->val_off = off;
 		arg->reg_off = 0;
-	} else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
+	} else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
 		/* Register read case, e.g., -8@x4 */
 		arg->arg_type = USDT_ARG_REG;
 		arg->val_off = 0;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
-- 
cgit v1.2.3-58-ga151


From 6e8280b958c5d7edc514cf347a800b23b7732b2b Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:05 -0400
Subject: selftests/bpf: Fix memory leak caused by not destroying skeleton

Some test cases does not destroy skeleton object correctly, causing ASAN
to report memory leak warning. Fix it.

Fixes: 0ef6740e9777 ("selftests/bpf: Add tests for kptr_ref refcounting")
Fixes: 1642a3945e22 ("selftests/bpf: Add struct argument tests with fentry/fexit programs.")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-4-xukuohai@huaweicloud.com
---
 tools/testing/selftests/bpf/prog_tests/map_kptr.c       | 3 ++-
 tools/testing/selftests/bpf/prog_tests/tracing_struct.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index fdcea7a61491..0d66b1524208 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -105,7 +105,7 @@ static void test_map_kptr_success(bool test_run)
 	ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
 
 	if (test_run)
-		return;
+		goto exit;
 
 	ret = bpf_map__update_elem(skel->maps.array_map,
 				   &key, sizeof(key), buf, sizeof(buf), 0);
@@ -132,6 +132,7 @@ static void test_map_kptr_success(bool test_run)
 	ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
 	ASSERT_OK(ret, "lru_hash_map delete");
 
+exit:
 	map_kptr__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index d5022b91d1e4..48dc9472e160 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -15,7 +15,7 @@ static void test_fentry(void)
 
 	err = tracing_struct__attach(skel);
 	if (!ASSERT_OK(err, "tracing_struct__attach"))
-		return;
+		goto destroy_skel;
 
 	ASSERT_OK(trigger_module_test_read(256), "trigger_read");
 
@@ -54,6 +54,7 @@ static void test_fentry(void)
 	ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret");
 
 	tracing_struct__detach(skel);
+destroy_skel:
 	tracing_struct__destroy(skel);
 }
 
-- 
cgit v1.2.3-58-ga151


From 6d2e21dc4db3933db65293552ecc1ede26febeca Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:06 -0400
Subject: selftest/bpf: Fix memory leak in kprobe_multi_test

The get_syms() function in kprobe_multi_test.c does not free the string
memory allocated by sscanf correctly. Fix it.

Fixes: 5b6c7e5c4434 ("selftests/bpf: Add attach bench test")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-5-xukuohai@huaweicloud.com
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 26 ++++++++++++----------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index d457a55ff408..287b3ac40227 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -325,7 +325,7 @@ static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_u
 static int get_syms(char ***symsp, size_t *cntp)
 {
 	size_t cap = 0, cnt = 0, i;
-	char *name, **syms = NULL;
+	char *name = NULL, **syms = NULL;
 	struct hashmap *map;
 	char buf[256];
 	FILE *f;
@@ -352,6 +352,8 @@ static int get_syms(char ***symsp, size_t *cntp)
 		/* skip modules */
 		if (strchr(buf, '['))
 			continue;
+
+		free(name);
 		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
 			continue;
 		/*
@@ -369,32 +371,32 @@ static int get_syms(char ***symsp, size_t *cntp)
 		if (!strncmp(name, "__ftrace_invalid_address__",
 			     sizeof("__ftrace_invalid_address__") - 1))
 			continue;
+
 		err = hashmap__add(map, name, NULL);
-		if (err) {
-			free(name);
-			if (err == -EEXIST)
-				continue;
+		if (err == -EEXIST)
+			continue;
+		if (err)
 			goto error;
-		}
+
 		err = libbpf_ensure_mem((void **) &syms, &cap,
 					sizeof(*syms), cnt + 1);
-		if (err) {
-			free(name);
+		if (err)
 			goto error;
-		}
-		syms[cnt] = name;
-		cnt++;
+
+		syms[cnt++] = name;
+		name = NULL;
 	}
 
 	*symsp = syms;
 	*cntp = cnt;
 
 error:
+	free(name);
 	fclose(f);
 	hashmap__free(map);
 	if (err) {
 		for (i = 0; i < cnt; i++)
-			free(syms[cnt]);
+			free(syms[i]);
 		free(syms);
 	}
 	return err;
-- 
cgit v1.2.3-58-ga151


From 4abdb1d5b250df4b5b3afd394d5e2fa516064c04 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:07 -0400
Subject: selftests/bpf: Fix error failure of case test_xdp_adjust_tail_grow

test_xdp_adjust_tail_grow failed with ipv6:
  test_xdp_adjust_tail_grow:FAIL:ipv6 unexpected error: -28 (errno 28)

The reason is that this test case tests ipv4 before ipv6, and when ipv4
test finished, topts.data_size_out was set to 54, which is smaller than the
ipv6 output data size 114, so ipv6 test fails with NOSPC error.

Fix it by reset topts.data_size_out to sizeof(buf) before testing ipv6.

Fixes: 04fcb5f9a104 ("selftests/bpf: Migrate from bpf_prog_test_run")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-6-xukuohai@huaweicloud.com
---
 tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 9b9cf8458adf..009ee37607df 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -63,6 +63,7 @@ static void test_xdp_adjust_tail_grow(void)
 	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
 	topts.data_in = &pkt_v6;
 	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf);
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv6");
 	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
-- 
cgit v1.2.3-58-ga151


From cbc1c998da59687e8bbc4667154a72eead2daf2d Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 11 Oct 2022 08:01:08 -0400
Subject: selftest/bpf: Fix error usage of ASSERT_OK in xdp_adjust_tail.c

xdp_adjust_tail.c calls ASSERT_OK() to check the return value of
bpf_prog_test_load(), but the condition is not correct. Fix it.

Fixes: 791cad025051 ("bpf: selftests: Get rid of CHECK macro in xdp_adjust_tail.c")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20221011120108.782373-7-xukuohai@huaweicloud.com
---
 tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 009ee37607df..39973ea1ce43 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -18,7 +18,7 @@ static void test_xdp_adjust_tail_shrink(void)
 	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
+	if (!ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
 		return;
 
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -53,7 +53,7 @@ static void test_xdp_adjust_tail_grow(void)
 	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
+	if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -90,7 +90,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
+	if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
 	/* Test case-64 */
-- 
cgit v1.2.3-58-ga151


From 51deedc9b8680953437dfe359e5268120de10e30 Mon Sep 17 00:00:00 2001
From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Date: Wed, 12 Oct 2022 10:23:51 +0800
Subject: libbpf: Use elf_getshdrnum() instead of e_shnum

This commit replace e_shnum with the elf_getshdrnum() helper to fix two
oss-fuzz-reported heap-buffer overflow in __bpf_object__open. Both
reports are incorrectly marked as fixed and while still being
reproducible in the latest libbpf.

  # clusterfuzz-testcase-minimized-bpf-object-fuzzer-5747922482888704
  libbpf: loading object 'fuzz-object' from buffer
  libbpf: sec_cnt is 0
  libbpf: elf: section(1) .data, size 0, link 538976288, flags 2020202020202020, type=2
  libbpf: elf: section(2) .data, size 32, link 538976288, flags 202020202020ff20, type=1
  =================================================================
  ==13==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000000c0 at pc 0x0000005a7b46 bp 0x7ffd12214af0 sp 0x7ffd12214ae8
  WRITE of size 4 at 0x6020000000c0 thread T0
  SCARINESS: 46 (4-byte-write-heap-buffer-overflow-far-from-bounds)
      #0 0x5a7b45 in bpf_object__elf_collect /src/libbpf/src/libbpf.c:3414:24
      #1 0x5733c0 in bpf_object_open /src/libbpf/src/libbpf.c:7223:16
      #2 0x5739fd in bpf_object__open_mem /src/libbpf/src/libbpf.c:7263:20
      ...

The issue lie in libbpf's direct use of e_shnum field in ELF header as
the section header count. Where as libelf implemented an extra logic
that, when e_shnum == 0 && e_shoff != 0, will use sh_size member of the
initial section header as the real section header count (part of ELF
spec to accommodate situation where section header counter is larger
than SHN_LORESERVE).

The above inconsistency lead to libbpf writing into a zero-entry calloc
area. So intead of using e_shnum directly, use the elf_getshdrnum()
helper provided by libelf to retrieve the section header counter into
sec_cnt.

Fixes: 0d6988e16a12 ("libbpf: Fix section counting logic")
Fixes: 25bbbd7a444b ("libbpf: Remove assumptions about uniqueness of .rodata/.data/.bss maps")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=40868
Link: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=40957
Link: https://lore.kernel.org/bpf/20221012022353.7350-2-shung-hsi.yu@suse.com
---
 tools/lib/bpf/libbpf.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 184ce1684dcd..2e8ac13de6a0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -597,7 +597,7 @@ struct elf_state {
 	size_t shstrndx; /* section index for section name strings */
 	size_t strtabidx;
 	struct elf_sec_desc *secs;
-	int sec_cnt;
+	size_t sec_cnt;
 	int btf_maps_shndx;
 	__u32 btf_maps_sec_btf_id;
 	int text_shndx;
@@ -3312,10 +3312,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 	Elf64_Shdr *sh;
 
 	/* ELF section indices are 0-based, but sec #0 is special "invalid"
-	 * section. e_shnum does include sec #0, so e_shnum is the necessary
-	 * size of an array to keep all the sections.
+	 * section. Since section count retrieved by elf_getshdrnum() does
+	 * include sec #0, it is already the necessary size of an array to keep
+	 * all the sections.
 	 */
-	obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
+	if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
+		pr_warn("elf: failed to get the number of sections for %s: %s\n",
+			obj->path, elf_errmsg(-1));
+		return -LIBBPF_ERRNO__FORMAT;
+	}
 	obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
 	if (!obj->efile.secs)
 		return -ENOMEM;
-- 
cgit v1.2.3-58-ga151


From 35a855509e6ee3442477c8ebc6827b5b5d32a7b5 Mon Sep 17 00:00:00 2001
From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Date: Wed, 12 Oct 2022 10:23:52 +0800
Subject: libbpf: Deal with section with no data gracefully

ELF section data pointer returned by libelf may be NULL (if section has
SHT_NOBITS), so null check section data pointer before attempting to
copy license and kversion section.

Fixes: cb1e5e961991 ("bpf tools: Collect version and license from ELF sections")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221012022353.7350-3-shung-hsi.yu@suse.com
---
 tools/lib/bpf/libbpf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2e8ac13de6a0..29e9df0c232b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1408,6 +1408,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
 static int
 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 {
+	if (!data) {
+		pr_warn("invalid license section in %s\n", obj->path);
+		return -LIBBPF_ERRNO__FORMAT;
+	}
 	/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
 	 * go over allowed ELF data section buffer
 	 */
@@ -1421,7 +1425,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 {
 	__u32 kver;
 
-	if (size != sizeof(kver)) {
+	if (!data || size != sizeof(kver)) {
 		pr_warn("invalid kver section in %s\n", obj->path);
 		return -LIBBPF_ERRNO__FORMAT;
 	}
-- 
cgit v1.2.3-58-ga151


From d0d382f95a9270dcf803539d6781d6bd67e3f5b2 Mon Sep 17 00:00:00 2001
From: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Date: Wed, 12 Oct 2022 10:23:53 +0800
Subject: libbpf: Fix null-pointer dereference in find_prog_by_sec_insn()

When there are no program sections, obj->programs is left unallocated,
and find_prog_by_sec_insn()'s search lands on &obj->programs[0] == NULL,
and will cause null-pointer dereference in the following access to
prog->sec_idx.

Guard the search with obj->nr_programs similar to what's being done in
__bpf_program__iter() to prevent null-pointer access from happening.

Fixes: db2b8b06423c ("libbpf: Support CO-RE relocations for multi-prog sections")
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221012022353.7350-4-shung-hsi.yu@suse.com
---
 tools/lib/bpf/libbpf.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 29e9df0c232b..8c3f236c86e4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4115,6 +4115,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
 	int l = 0, r = obj->nr_programs - 1, m;
 	struct bpf_program *prog;
 
+	if (!obj->nr_programs)
+		return NULL;
+
 	while (l < r) {
 		m = l + (r - l + 1) / 2;
 		prog = &obj->programs[m];
-- 
cgit v1.2.3-58-ga151


From 62c69e89e81bfbdb9a87ae3e0599dcc6aacf786b Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 11 Oct 2022 15:12:49 +0800
Subject: selftests/bpf: Use sys_pidfd_open() helper when possible

SYS_pidfd_open may be undefined for old glibc, so using sys_pidfd_open()
helper defined in task_local_storage_helpers.h instead to fix potential
build failure.

And according to commit 7615d9e1780e ("arch: wire-up pidfd_open()"), the
syscall number of pidfd_open is always 434 except for alpha architure,
so update the definition of __NR_pidfd_open accordingly.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221011071249.3471760-1-houtao@huaweicloud.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c        | 10 +++-------
 tools/testing/selftests/bpf/task_local_storage_helpers.h |  4 ++++
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index ecde236047fe..c39d40f4b268 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -3,6 +3,7 @@
 #include <test_progs.h>
 #include <unistd.h>
 #include <sys/syscall.h>
+#include <task_local_storage_helpers.h>
 #include "bpf_iter_ipv6_route.skel.h"
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
@@ -175,11 +176,6 @@ static void test_bpf_map(void)
 	bpf_iter_bpf_map__destroy(skel);
 }
 
-static int pidfd_open(pid_t pid, unsigned int flags)
-{
-	return syscall(SYS_pidfd_open, pid, flags);
-}
-
 static void check_bpf_link_info(const struct bpf_program *prog)
 {
 	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -295,8 +291,8 @@ static void test_task_pidfd(void)
 	union bpf_iter_link_info linfo;
 	int pidfd;
 
-	pidfd = pidfd_open(getpid(), 0);
-	if (!ASSERT_GT(pidfd, 0, "pidfd_open"))
+	pidfd = sys_pidfd_open(getpid(), 0);
+	if (!ASSERT_GT(pidfd, 0, "sys_pidfd_open"))
 		return;
 
 	memset(&linfo, 0, sizeof(linfo));
diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h
index 711d5abb7d51..281f86132766 100644
--- a/tools/testing/selftests/bpf/task_local_storage_helpers.h
+++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h
@@ -7,8 +7,12 @@
 #include <sys/types.h>
 
 #ifndef __NR_pidfd_open
+#ifdef __alpha__
+#define __NR_pidfd_open 544
+#else
 #define __NR_pidfd_open 434
 #endif
+#endif
 
 static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 {
-- 
cgit v1.2.3-58-ga151


From 6c4e777fbba6e7dd6a0757c0e7bba66cdbe611cd Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Mon, 17 Oct 2022 23:24:58 +0000
Subject: bpf/docs: Update README for most recent vmtest.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 40b09653b197 ("selftests/bpf: Adjust vmtest.sh to use local
kernel configuration") the vmtest.sh script no longer downloads a kernel
configuration but uses the local, in-repository one.
This change updates the README, which still mentions the old behavior.

Signed-off-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221017232458.1272762-1-deso@posteo.net
---
 tools/testing/selftests/bpf/README.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index d3c6b3da0bb1..822548d0f2ae 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -14,10 +14,11 @@ It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtes
 The script tries to ensure that the tests are run with the same environment as they
 would be run post-submit in the CI used by the Maintainers.
 
-This script downloads a suitable Kconfig and VM userspace image from the system used by
-the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the
-bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
-saves the resulting output (by default in ``~/.bpf_selftests``).
+This script uses the in-tree kernel configuration and downloads a VM userspace
+image from the system used by the CI. It builds the kernel (without overwriting
+your existing Kconfig), recompiles the bpf selftests, runs them (by default
+``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by
+default in ``~/.bpf_selftests``).
 
 Script dependencies:
 - clang (preferably built from sources, https://github.com/llvm/llvm-project);
@@ -26,7 +27,7 @@ Script dependencies:
 - docutils (for ``rst2man``);
 - libcap-devel.
 
-For more information on about using the script, run:
+For more information about using the script, run:
 
 .. code-block:: console
 
-- 
cgit v1.2.3-58-ga151


From 6fb1faa1b92b19e3b1fad70a800c87bee934d67b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 18 Oct 2022 09:39:58 +0300
Subject: selftests: bridge_vlan_mcast: Delete qdiscs during cleanup

The qdiscs are added during setup, but not deleted during cleanup,
resulting in the following error messages:

 # ./bridge_vlan_mcast.sh
 [...]
 # ./bridge_vlan_mcast.sh
 Error: Exclusivity flag on, cannot modify.
 Error: Exclusivity flag on, cannot modify.

Solve by deleting the qdiscs during cleanup.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
index 8748d1b1d95b..72dfbeaf56b9 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -59,6 +59,9 @@ switch_create()
 
 switch_destroy()
 {
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
 	ip link set dev $swp2 down
 	ip link set dev $swp1 down
 
-- 
cgit v1.2.3-58-ga151


From b526b2ea1454d03b294cd10aa35695d5232b1106 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 18 Oct 2022 09:39:59 +0300
Subject: selftests: bridge_igmp: Remove unnecessary address deletion

The test group address is added and removed in v2reportleave_test().
There is no need to delete it again during cleanup as it results in the
following error message:

 # bash -x ./bridge_igmp.sh
 [...]
 + cleanup
 + pre_cleanup
 [...]
 + ip address del dev swp4 239.10.10.10/32
 RTNETLINK answers: Cannot assign requested address
 + h2_destroy

Solve by removing the unnecessary address deletion.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 1162836f8f32..2aa66d2a1702 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -96,9 +96,6 @@ cleanup()
 
 	switch_destroy
 
-	# Always cleanup the mcast group
-	ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null
-
 	h2_destroy
 	h1_destroy
 
-- 
cgit v1.2.3-58-ga151


From 81bfcc3fcd2f99578eebc2f64248253b57fc5c76 Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Tue, 18 Oct 2022 16:40:15 +0000
Subject: bpf/docs: Summarize CI system and deny lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change adds a brief summary of the BPF continuous integration (CI)
to the BPF selftest documentation. The summary focuses not so much on
actual workings of the CI, as it is maintained outside of the
repository, but aims to document the few bits of it that are sourced
from this repository and that developers may want to adjust as part of
patch submissions: the BPF kernel configuration and the deny list
file(s).

Changelog:
- v1->v2:
  - use s390x instead of s390 for consistency

Signed-off-by: Daniel Müller <deso@posteo.net>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221018164015.1970862-1-deso@posteo.net
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/README.rst | 42 +++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 822548d0f2ae..cb9b95702ac6 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -6,13 +6,53 @@ General instructions on running selftests can be found in
 
 __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests
 
+=============
+BPF CI System
+=============
+
+BPF employs a continuous integration (CI) system to check patch submission in an
+automated fashion. The system runs selftests for each patch in a series. Results
+are propagated to patchwork, where failures are highlighted similar to
+violations of other checks (such as additional warnings being emitted or a
+``scripts/checkpatch.pl`` reported deficiency):
+
+  https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
+
+The CI system executes tests on multiple architectures. It uses a kernel
+configuration derived from both the generic and architecture specific config
+file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and
+``config.x86_64``).
+
+Denylisting Tests
+=================
+
+It is possible for some architectures to not have support for all BPF features.
+In such a case tests in CI may fail. An example of such a shortcoming is BPF
+trampoline support on IBM's s390x architecture. For cases like this, an in-tree
+deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can
+be used to prevent the test from running on such an architecture.
+
+In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is
+honored on every architecture running tests.
+
+These files are organized in three columns. The first column lists the test in
+question. This can be the name of a test suite or of an individual test. The
+remaining two columns provide additional meta data that helps identify and
+classify the entry: column two is a copy and paste of the error being reported
+when running the test in the setting in question. The third column, if
+available, summarizes the underlying problem. A value of ``trampoline``, for
+example, indicates that lack of trampoline support is causing the test to fail.
+This last entry helps identify tests that can be re-enabled once such support is
+added.
+
 =========================
 Running Selftests in a VM
 =========================
 
 It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``.
 The script tries to ensure that the tests are run with the same environment as they
-would be run post-submit in the CI used by the Maintainers.
+would be run post-submit in the CI used by the Maintainers, with the exception
+that deny lists are not automatically honored.
 
 This script uses the in-tree kernel configuration and downloads a VM userspace
 image from the system used by the CI. It builds the kernel (without overwriting
-- 
cgit v1.2.3-58-ga151


From f33f742d567449bad965bf60c0d65f861c1d7101 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 18 Oct 2022 17:28:14 -0700
Subject: libbpf: clean up and refactor BTF fixup step

Refactor libbpf's BTF fixup step during BPF object open phase. The only
functional change is that we now ignore BTF_VAR_GLOBAL_EXTERN variables
during fix up, not just BTF_VAR_STATIC ones, which shouldn't cause any
change in behavior as there shouldn't be any extern variable in data
sections for valid BPF object anyways.

Otherwise it's just collapsing two functions that have no reason to be
separate, and switching find_elf_var_offset() helper to return entire
symbol pointer, not just its offset. This will be used by next patch to
get ELF symbol visibility.

While refactoring, also "normalize" debug messages inside
btf_fixup_datasec() to follow general libbpf style and print out data
section name consistently, where it's available.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221019002816.359650-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 96 ++++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 54 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8c3f236c86e4..8802e06c5569 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1461,15 +1461,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32
 	return -ENOENT;
 }
 
-static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
+static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
 {
 	Elf_Data *symbols = obj->efile.symbols;
 	const char *sname;
 	size_t si;
 
-	if (!name || !off)
-		return -EINVAL;
-
 	for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
 		Elf64_Sym *sym = elf_sym_by_idx(obj, si);
 
@@ -1483,15 +1480,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _
 		sname = elf_sym_str(obj, sym->st_name);
 		if (!sname) {
 			pr_warn("failed to get sym name string for var %s\n", name);
-			return -EIO;
-		}
-		if (strcmp(name, sname) == 0) {
-			*off = sym->st_value;
-			return 0;
+			return ERR_PTR(-EIO);
 		}
+		if (strcmp(name, sname) == 0)
+			return sym;
 	}
 
-	return -ENOENT;
+	return ERR_PTR(-ENOENT);
 }
 
 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
@@ -2850,57 +2845,63 @@ static int compare_vsi_off(const void *_a, const void *_b)
 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 			     struct btf_type *t)
 {
-	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
-	const char *name = btf__name_by_offset(btf, t->name_off);
-	const struct btf_type *t_var;
+	__u32 size = 0, i, vars = btf_vlen(t);
+	const char *sec_name = btf__name_by_offset(btf, t->name_off);
 	struct btf_var_secinfo *vsi;
-	const struct btf_var *var;
-	int ret;
+	int err;
 
-	if (!name) {
+	if (!sec_name) {
 		pr_debug("No name found in string section for DATASEC kind.\n");
 		return -ENOENT;
 	}
 
-	/* .extern datasec size and var offsets were set correctly during
-	 * extern collection step, so just skip straight to sorting variables
+	/* extern-backing datasecs (.ksyms, .kconfig) have their size and
+	 * variable offsets set at the previous step, so we skip any fixups
+	 * for such sections
 	 */
 	if (t->size)
 		goto sort_vars;
 
-	ret = find_elf_sec_sz(obj, name, &size);
-	if (ret || !size) {
-		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+	err = find_elf_sec_sz(obj, sec_name, &size);
+	if (err || !size) {
+		pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
+			 sec_name, size, err);
 		return -ENOENT;
 	}
 
 	t->size = size;
 
 	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+		const struct btf_type *t_var;
+		struct btf_var *var;
+		const char *var_name;
+		Elf64_Sym *sym;
+
 		t_var = btf__type_by_id(btf, vsi->type);
 		if (!t_var || !btf_is_var(t_var)) {
-			pr_debug("Non-VAR type seen in section %s\n", name);
+			pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
 			return -EINVAL;
 		}
 
 		var = btf_var(t_var);
-		if (var->linkage == BTF_VAR_STATIC)
+		if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
 			continue;
 
-		name = btf__name_by_offset(btf, t_var->name_off);
-		if (!name) {
-			pr_debug("No name found in string section for VAR kind\n");
+		var_name = btf__name_by_offset(btf, t_var->name_off);
+		if (!var_name) {
+			pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
+				 sec_name, i);
 			return -ENOENT;
 		}
 
-		ret = find_elf_var_offset(obj, name, &off);
-		if (ret) {
-			pr_debug("No offset found in symbol table for VAR %s\n",
-				 name);
+		sym = find_elf_var_sym(obj, var_name);
+		if (IS_ERR(sym)) {
+			pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
+				 sec_name, var_name);
 			return -ENOENT;
 		}
 
-		vsi->offset = off;
+		vsi->offset = sym->st_value;
 	}
 
 sort_vars:
@@ -2908,13 +2909,16 @@ sort_vars:
 	return 0;
 }
 
-static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
+static int bpf_object_fixup_btf(struct bpf_object *obj)
 {
-	int err = 0;
-	__u32 i, n = btf__type_cnt(btf);
+	int i, n, err = 0;
 
+	if (!obj->btf)
+		return 0;
+
+	n = btf__type_cnt(obj->btf);
 	for (i = 1; i < n; i++) {
-		struct btf_type *t = btf_type_by_id(btf, i);
+		struct btf_type *t = btf_type_by_id(obj->btf, i);
 
 		/* Loader needs to fix up some of the things compiler
 		 * couldn't get its hands on while emitting BTF. This
@@ -2922,28 +2926,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
 		 * the info from the ELF itself for this purpose.
 		 */
 		if (btf_is_datasec(t)) {
-			err = btf_fixup_datasec(obj, btf, t);
+			err = btf_fixup_datasec(obj, obj->btf, t);
 			if (err)
-				break;
+				return err;
 		}
 	}
 
-	return libbpf_err(err);
-}
-
-static int bpf_object__finalize_btf(struct bpf_object *obj)
-{
-	int err;
-
-	if (!obj->btf)
-		return 0;
-
-	err = btf_finalize_data(obj, obj->btf);
-	if (err) {
-		pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
-		return err;
-	}
-
 	return 0;
 }
 
@@ -7233,7 +7221,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	err = err ? : bpf_object__check_endianness(obj);
 	err = err ? : bpf_object__elf_collect(obj);
 	err = err ? : bpf_object__collect_externs(obj);
-	err = err ? : bpf_object__finalize_btf(obj);
+	err = err ? : bpf_object_fixup_btf(obj);
 	err = err ? : bpf_object__init_maps(obj, opts);
 	err = err ? : bpf_object_init_progs(obj, opts);
 	err = err ? : bpf_object__collect_relos(obj);
-- 
cgit v1.2.3-58-ga151


From 4fcac46c7e107a93030d19c6ea7b90540fc80b1b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 18 Oct 2022 17:28:15 -0700
Subject: libbpf: only add BPF_F_MMAPABLE flag for data maps with global vars

Teach libbpf to not add BPF_F_MMAPABLE flag unnecessarily for ARRAY maps
that are backing data sections, if such data sections don't expose any
variables to user-space. Exposed variables are those that have
STB_GLOBAL or STB_WEAK ELF binding and correspond to BTF VAR's
BTF_VAR_GLOBAL_ALLOCATED linkage.

The overall idea is that if some data section doesn't have any variable that
is exposed through BPF skeleton, then there is no reason to make such
BPF array mmapable. Making BPF array mmapable is not a free no-op
action, because BPF verifier doesn't allow users to put special objects
(such as BPF spin locks, RB tree nodes, linked list nodes, kptrs, etc;
anything that has a sensitive internal state that should not be modified
arbitrarily from user space) into mmapable arrays, as there is no way to
prevent user space from corrupting such sensitive state through direct
memory access through memory-mapped region.

By making sure that libbpf doesn't add BPF_F_MMAPABLE flag to BPF array
maps corresponding to data sections that only have static variables
(which are not supposed to be visible to user space according to libbpf
and BPF skeleton rules), users now can have spinlocks, kptrs, etc in
either default .bss/.data sections or custom .data.* sections (assuming
there are no global variables in such sections).

The only possible hiccup with this approach is the need to use global
variables during BPF static linking, even if it's not intended to be
shared with user space through BPF skeleton. To allow such scenarios,
extend libbpf's STV_HIDDEN ELF visibility attribute handling to
variables. Libbpf is already treating global hidden BPF subprograms as
static subprograms and adjusts BTF accordingly to make BPF verifier
verify such subprograms as static subprograms with preserving entire BPF
verifier state between subprog calls. This patch teaches libbpf to treat
global hidden variables as static ones and adjust BTF information
accordingly as well. This allows to share variables between multiple
object files during static linking, but still keep them internal to BPF
program and not get them exposed through BPF skeleton.

Note, that if the user has some advanced scenario where they absolutely
need BPF_F_MMAPABLE flag on .data/.bss/.rodata BPF array map despite
only having static variables, they still can achieve this by forcing it
through explicit bpf_map__set_map_flags() API.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20221019002816.359650-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 97 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 78 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8802e06c5569..027fd9565c16 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1577,7 +1577,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name)
 }
 
 static int
-bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
+map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
+
+/* Internal BPF map is mmap()'able only if at least one of corresponding
+ * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
+ * variable and it's not marked as __hidden (which turns it into, effectively,
+ * a STATIC variable).
+ */
+static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
+{
+	const struct btf_type *t, *vt;
+	struct btf_var_secinfo *vsi;
+	int i, n;
+
+	if (!map->btf_value_type_id)
+		return false;
+
+	t = btf__type_by_id(obj->btf, map->btf_value_type_id);
+	if (!btf_is_datasec(t))
+		return false;
+
+	vsi = btf_var_secinfos(t);
+	for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
+		vt = btf__type_by_id(obj->btf, vsi->type);
+		if (!btf_is_var(vt))
+			continue;
+
+		if (btf_var(vt)->linkage != BTF_VAR_STATIC)
+			return true;
+	}
+
+	return false;
+}
 
 static int
 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
@@ -1609,7 +1640,12 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	def->max_entries = 1;
 	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 			 ? BPF_F_RDONLY_PROG : 0;
-	def->map_flags |= BPF_F_MMAPABLE;
+
+	/* failures are fine because of maps like .rodata.str1.1 */
+	(void) map_fill_btf_type_info(obj, map);
+
+	if (map_is_mmapable(obj, map))
+		def->map_flags |= BPF_F_MMAPABLE;
 
 	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
 		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
@@ -1626,9 +1662,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 		return err;
 	}
 
-	/* failures are fine because of maps like .rodata.str1.1 */
-	(void) bpf_map_find_btf_info(obj, map);
-
 	if (data)
 		memcpy(map->mmaped, data, data_sz);
 
@@ -2540,7 +2573,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		fill_map_from_def(map->inner_map, &inner_def);
 	}
 
-	err = bpf_map_find_btf_info(obj, map);
+	err = map_fill_btf_type_info(obj, map);
 	if (err)
 		return err;
 
@@ -2848,6 +2881,7 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 	__u32 size = 0, i, vars = btf_vlen(t);
 	const char *sec_name = btf__name_by_offset(btf, t->name_off);
 	struct btf_var_secinfo *vsi;
+	bool fixup_offsets = false;
 	int err;
 
 	if (!sec_name) {
@@ -2855,21 +2889,34 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 		return -ENOENT;
 	}
 
-	/* extern-backing datasecs (.ksyms, .kconfig) have their size and
-	 * variable offsets set at the previous step, so we skip any fixups
-	 * for such sections
+	/* Extern-backing datasecs (.ksyms, .kconfig) have their size and
+	 * variable offsets set at the previous step. Further, not every
+	 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
+	 * all fixups altogether for such sections and go straight to sorting
+	 * VARs within their DATASEC.
 	 */
-	if (t->size)
+	if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
 		goto sort_vars;
 
-	err = find_elf_sec_sz(obj, sec_name, &size);
-	if (err || !size) {
-		pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
-			 sec_name, size, err);
-		return -ENOENT;
-	}
+	/* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
+	 * fix this up. But BPF static linker already fixes this up and fills
+	 * all the sizes and offsets during static linking. So this step has
+	 * to be optional. But the STV_HIDDEN handling is non-optional for any
+	 * non-extern DATASEC, so the variable fixup loop below handles both
+	 * functions at the same time, paying the cost of BTF VAR <-> ELF
+	 * symbol matching just once.
+	 */
+	if (t->size == 0) {
+		err = find_elf_sec_sz(obj, sec_name, &size);
+		if (err || !size) {
+			pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
+				 sec_name, size, err);
+			return -ENOENT;
+		}
 
-	t->size = size;
+		t->size = size;
+		fixup_offsets = true;
+	}
 
 	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 		const struct btf_type *t_var;
@@ -2901,7 +2948,19 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 			return -ENOENT;
 		}
 
-		vsi->offset = sym->st_value;
+		if (fixup_offsets)
+			vsi->offset = sym->st_value;
+
+		/* if variable is a global/weak symbol, but has restricted
+		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
+		 * as static. This follows similar logic for functions (BPF
+		 * subprogs) and influences libbpf's further decisions about
+		 * whether to make global data BPF array maps as
+		 * BPF_F_MMAPABLE.
+		 */
+		if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+		    || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
+			var->linkage = BTF_VAR_STATIC;
 	}
 
 sort_vars:
@@ -4223,7 +4282,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
 	return 0;
 }
 
-static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
+static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
 {
 	int id;
 
-- 
cgit v1.2.3-58-ga151


From 2f968e9f4a953037f798802006ecd298c014b5b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 18 Oct 2022 17:28:16 -0700
Subject: libbpf: add non-mmapable data section selftest

Add non-mmapable data section to test_skeleton selftest and make sure it
really isn't mmapable by trying to mmap() it anyways.

Also make sure that libbpf doesn't report BPF_F_MMAPABLE flag to users.

Additional, some more manual testing was performed that this feature
works as intended.

Looking at created map through bpftool shows that flags passed to kernel are
indeed zero:

  $ bpftool map show
  ...
  1782: array  name .data.non_mmapa  flags 0x0
          key 4B  value 16B  max_entries 1  memlock 4096B
          btf_id 1169
          pids test_progs(8311)
  ...

Checking BTF uploaded to kernel for this map shows that zero_key and
zero_value are indeed marked as static, even though zero_key is actually
original global (but STV_HIDDEN) variable:

  $ bpftool btf dump id 1169
  ...
  [51] VAR 'zero_key' type_id=2, linkage=static
  [52] VAR 'zero_value' type_id=7, linkage=static
  ...
  [62] DATASEC '.data.non_mmapable' size=16 vlen=2
          type_id=51 offset=0 size=4 (VAR 'zero_key')
          type_id=52 offset=4 size=12 (VAR 'zero_value')
  ...

And original BTF does have zero_key marked as linkage=global:

  $ bpftool btf dump file test_skeleton.bpf.linked3.o
  ...
  [51] VAR 'zero_key' type_id=2, linkage=global
  [52] VAR 'zero_value' type_id=7, linkage=static
  ...
  [62] DATASEC '.data.non_mmapable' size=16 vlen=2
          type_id=51 offset=0 size=4 (VAR 'zero_key')
          type_id=52 offset=4 size=12 (VAR 'zero_value')

Bpftool didn't require any changes at all because it checks whether internal
map is mmapable already, but just to double-check generated skeleton, we
see that .data.non_mmapable neither sets mmaped pointer nor has
a corresponding field in the skeleton:

  $ grep non_mmapable test_skeleton.skel.h
                  struct bpf_map *data_non_mmapable;
          s->maps[7].name = ".data.non_mmapable";
          s->maps[7].map = &obj->maps.data_non_mmapable;

But .data.read_mostly has all of those things:

  $ grep read_mostly test_skeleton.skel.h
                  struct bpf_map *data_read_mostly;
          struct test_skeleton__data_read_mostly {
                  int read_mostly_var;
          } *data_read_mostly;
          s->maps[6].name = ".data.read_mostly";
          s->maps[6].map = &obj->maps.data_read_mostly;
          s->maps[6].mmaped = (void **)&obj->data_read_mostly;
          _Static_assert(sizeof(s->data_read_mostly->read_mostly_var) == 4, "unexpected size of 'read_mostly_var'");

Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221019002816.359650-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/skeleton.c | 11 ++++++++++-
 tools/testing/selftests/bpf/progs/test_skeleton.c | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index 99dac5292b41..bc6817aee9aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Facebook */
 
 #include <test_progs.h>
+#include <sys/mman.h>
 
 struct s {
 	int a;
@@ -22,7 +23,8 @@ void test_skeleton(void)
 	struct test_skeleton__kconfig *kcfg;
 	const void *elf_bytes;
 	size_t elf_bytes_sz = 0;
-	int i;
+	void *m;
+	int i, fd;
 
 	skel = test_skeleton__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -124,6 +126,13 @@ void test_skeleton(void)
 
 	ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
 
+	fd = bpf_map__fd(skel->maps.data_non_mmapable);
+	m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0);
+	if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success"))
+		munmap(m, getpagesize());
+
+	ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags");
+
 	elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
 	ASSERT_OK_PTR(elf_bytes, "elf_bytes");
 	ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 1a4e93f6d9df..adece9f91f58 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -53,6 +53,20 @@ int out_mostly_var;
 
 char huge_arr[16 * 1024 * 1024];
 
+/* non-mmapable custom .data section */
+
+struct my_value { int x, y, z; };
+
+__hidden int zero_key SEC(".data.non_mmapable");
+static struct my_value zero_value SEC(".data.non_mmapable");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct my_value);
+	__uint(max_entries, 1);
+} my_map SEC(".maps");
+
 SEC("raw_tp/sys_enter")
 int handler(const void *ctx)
 {
@@ -75,6 +89,9 @@ int handler(const void *ctx)
 
 	huge_arr[sizeof(huge_arr) - 1] = 123;
 
+	/* make sure zero_key and zero_value are not optimized out */
+	bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From 8662de2321496499a21841486660817f72ac9456 Mon Sep 17 00:00:00 2001
From: Jie Meng <jmeng@fb.com>
Date: Fri, 7 Oct 2022 13:23:49 -0700
Subject: bpf: add selftests for lsh, rsh, arsh with reg operand

Current tests cover only shifts with an immediate as the source
operand/shift counts; add a new test case to cover register operand.

Signed-off-by: Jie Meng <jmeng@fb.com>
Link: https://lore.kernel.org/r/20221007202348.1118830-4-jmeng@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/jit.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index 79021c30e51e..8bf37e5207f1 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -20,6 +20,30 @@
 	.result = ACCEPT,
 	.retval = 2,
 },
+{
+	"jit: lsh, rsh, arsh by reg",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_MOV64_IMM(BPF_REG_4, 1),
+	BPF_MOV64_IMM(BPF_REG_1, 0xff),
+	BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0),
+	BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4),
+	BPF_MOV64_REG(BPF_REG_4, BPF_REG_1),
+	BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
 {
 	"jit: mov32 for ldimm64, 1",
 	.insns = {
-- 
cgit v1.2.3-58-ga151


From 98af3746026c3eccb51b43181c41e001fe73f7af Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Thu, 20 Oct 2022 11:05:34 +0800
Subject: selftests/bpf: fix missing BPF object files

After commit afef88e65554 ("selftests/bpf: Store BPF object files with
.bpf.o extension"), we should use *.bpf.o instead of *.o.

In addition, use the BPF_FILE variable to save the BPF object file name,
which can be better identified and modified.

Fixes: afef88e65554 ("selftests/bpf: Store BPF object files with .bpf.o extension")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/1666235134-562-1-git-send-email-wangyufen@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_bpftool_metadata.sh |  7 +++++--
 tools/testing/selftests/bpf/test_flow_dissector.sh   |  6 ++++--
 tools/testing/selftests/bpf/test_lwt_ip_encap.sh     | 17 +++++++++--------
 tools/testing/selftests/bpf/test_lwt_seg6local.sh    |  9 +++++----
 tools/testing/selftests/bpf/test_tc_edt.sh           |  3 ++-
 tools/testing/selftests/bpf/test_tc_tunnel.sh        |  5 +++--
 tools/testing/selftests/bpf/test_tunnel.sh           |  5 +++--
 tools/testing/selftests/bpf/test_xdp_meta.sh         |  9 +++++----
 tools/testing/selftests/bpf/test_xdp_vlan.sh         |  8 ++++----
 9 files changed, 40 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
index 1bf81b49457a..b5520692f41b 100755
--- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -4,6 +4,9 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
+BPF_FILE_USED="metadata_used.bpf.o"
+BPF_FILE_UNUSED="metadata_unused.bpf.o"
+
 TESTNAME=bpftool_metadata
 BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
 BPF_DIR=$BPF_FS/test_$TESTNAME
@@ -55,7 +58,7 @@ mkdir $BPF_DIR
 
 trap cleanup EXIT
 
-bpftool prog load metadata_unused.o $BPF_DIR/unused
+bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused
 
 METADATA_PLAIN="$(bpftool prog)"
 echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
@@ -67,7 +70,7 @@ bpftool map | grep 'metadata.rodata' > /dev/null
 
 rm $BPF_DIR/unused
 
-bpftool prog load metadata_used.o $BPF_DIR/used
+bpftool prog load $BPF_FILE_USED $BPF_DIR/used
 
 METADATA_PLAIN="$(bpftool prog)"
 echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index 5303ce0c977b..4b298863797a 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # Load BPF flow dissector and verify it correctly dissects traffic
+
+BPF_FILE="bpf_flow.bpf.o"
 export TESTNAME=test_flow_dissector
 unmount=0
 
@@ -22,7 +24,7 @@ if [[ -z $(ip netns identify $$) ]]; then
 	if bpftool="$(which bpftool)"; then
 		echo "Testing global flow dissector..."
 
-		$bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
+		$bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \
 			type flow_dissector
 
 		if ! unshare --net $bpftool prog attach pinned \
@@ -95,7 +97,7 @@ else
 fi
 
 # Attach BPF program
-./flow_dissector_load -p bpf_flow.o -s _dissect
+./flow_dissector_load -p $BPF_FILE -s _dissect
 
 # Setup
 tc qdisc add dev lo ingress
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 6c69c42b1d60..1e565f47aca9 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -38,6 +38,7 @@
 #       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
 #       ping replies go DST->SRC directly
 
+BPF_FILE="test_lwt_ip_encap.bpf.o"
 if [[ $EUID -ne 0 ]]; then
 	echo "This script must be run as root"
 	echo "FAIL"
@@ -373,14 +374,14 @@ test_egress()
 	# install replacement routes (LWT/eBPF), pings succeed
 	if [ "${ENCAP}" == "IPv4" ] ; then
 		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
-			test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
 		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
-			test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
 	elif [ "${ENCAP}" == "IPv6" ] ; then
 		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
-			test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
 		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
-			test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
 	else
 		echo "    unknown encap ${ENCAP}"
 		TEST_STATUS=1
@@ -431,14 +432,14 @@ test_ingress()
 	# install replacement routes (LWT/eBPF), pings succeed
 	if [ "${ENCAP}" == "IPv4" ] ; then
 		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
-			test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
 		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
-			test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
 	elif [ "${ENCAP}" == "IPv6" ] ; then
 		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
-			test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
 		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
-			test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
 	else
 		echo "FAIL: unknown encap ${ENCAP}"
 		TEST_STATUS=1
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 826f4423ce02..0efea2292d6a 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -23,6 +23,7 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
+BPF_FILE="test_lwt_seg6local.bpf.o"
 readonly NS1="ns1-$(mktemp -u XXXXXX)"
 readonly NS2="ns2-$(mktemp -u XXXXXX)"
 readonly NS3="ns3-$(mktemp -u XXXXXX)"
@@ -117,18 +118,18 @@ ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
 ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
 ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
 
-ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
 ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
 
 ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
 
-ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
 ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
 ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
 
 ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
 
 ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
 ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
index daa7d1b8d309..76f0bd17061f 100755
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -5,6 +5,7 @@
 # with dst port = 9000 down to 5MBps. Then it measures actual
 # throughput of the flow.
 
+BPF_FILE="test_tc_edt.bpf.o"
 if [[ $EUID -ne 0 ]]; then
 	echo "This script must be run as root"
 	echo "FAIL"
@@ -54,7 +55,7 @@ ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
 ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
 ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
 ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
-	bpf da obj test_tc_edt.o sec cls_test
+	bpf da obj ${BPF_FILE} sec cls_test
 
 
 # start the listener
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 088fcad138c9..334bdfeab940 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -3,6 +3,7 @@
 #
 # In-place tunneling
 
+BPF_FILE="test_tc_tunnel.bpf.o"
 # must match the port that the bpf program filters on
 readonly port=8000
 
@@ -196,7 +197,7 @@ verify_data
 # client can no longer connect
 ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
 ip netns exec "${ns1}" tc filter add dev veth1 egress \
-	bpf direct-action object-file ./test_tc_tunnel.o \
+	bpf direct-action object-file ${BPF_FILE} \
 	section "encap_${tuntype}_${mac}"
 echo "test bpf encap without decap (expect failure)"
 server_listen
@@ -296,7 +297,7 @@ fi
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
 ip netns exec "${ns2}" tc filter add dev veth2 ingress \
-	bpf direct-action object-file ./test_tc_tunnel.o section decap
+	bpf direct-action object-file ${BPF_FILE} section decap
 echo "test bpf encap with bpf decap"
 client_connect
 verify_data
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index e9ebc67d73f7..2eaedc1d9ed3 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -45,6 +45,7 @@
 # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
 # 6) Forward the packet to the overlay tnl dev
 
+BPF_FILE="test_tunnel_kern.bpf.o"
 BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
 PING_ARG="-c 3 -w 10 -q"
 ret=0
@@ -545,7 +546,7 @@ test_xfrm_tunnel()
 	> /sys/kernel/debug/tracing/trace
 	setup_xfrm_tunnel
 	mkdir -p ${BPF_PIN_TUNNEL_DIR}
-	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}
+	bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
 	tc qdisc add dev veth1 clsact
 	tc filter add dev veth1 proto ip ingress bpf da object-pinned \
 		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
@@ -572,7 +573,7 @@ attach_bpf()
 	SET=$2
 	GET=$3
 	mkdir -p ${BPF_PIN_TUNNEL_DIR}
-	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/
+	bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
 	tc qdisc add dev $DEV clsact
 	tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
 	tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index ea69370caae3..2740322c1878 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -1,5 +1,6 @@
 #!/bin/sh
 
+BPF_FILE="test_xdp_meta.bpf.o"
 # Kselftest framework requirement - SKIP code is 4.
 readonly KSFT_SKIP=4
 readonly NS1="ns1-$(mktemp -u XXXXXX)"
@@ -42,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
 ip netns exec ${NS1} tc qdisc add dev veth1 clsact
 ip netns exec ${NS2} tc qdisc add dev veth2 clsact
 
-ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
+ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
 
-ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
+ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
 
 ip netns exec ${NS1} ip link set dev veth1 up
 ip netns exec ${NS2} ip link set dev veth2 up
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index 810c407e0286..fbcaa9f0120b 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -200,11 +200,11 @@ ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First p
 # ----------------------------------------------------------------------
 # In ns1: ingress use XDP to remove VLAN tags
 export DEVNS1=veth1
-export FILE=test_xdp_vlan.o
+export BPF_FILE=test_xdp_vlan.bpf.o
 
 # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
 export XDP_PROG=xdp_vlan_change
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
 
 # In ns1: egress use TC to add back VLAN tag 4011
 #  (del cmd)
@@ -212,7 +212,7 @@ ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PRO
 #
 ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
 ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
-  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+  prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
 
 # Now the namespaces can reach each-other, test with ping:
 ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
@@ -226,7 +226,7 @@ ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
 #
 export XDP_PROG=xdp_vlan_remove_outer2
 ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
 
 # Now the namespaces should still be able reach each-other, test with ping:
 ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-- 
cgit v1.2.3-58-ga151


From 19526e701ea096d13ebf881d32511e567dc41557 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Fri, 14 Oct 2022 16:31:13 +0800
Subject: bpftool: Add autoattach for bpf prog load|loadall

Add autoattach optional to support one-step load-attach-pin_link.

For example,
   $ bpftool prog loadall test.o /sys/fs/bpf/test autoattach

   $ bpftool link
   26: tracing  name test1  tag f0da7d0058c00236  gpl
   	loaded_at 2022-09-09T21:39:49+0800  uid 0
   	xlated 88B  jited 55B  memlock 4096B  map_ids 3
   	btf_id 55
   28: kprobe  name test3  tag 002ef1bef0723833  gpl
   	loaded_at 2022-09-09T21:39:49+0800  uid 0
   	xlated 88B  jited 56B  memlock 4096B  map_ids 3
   	btf_id 55
   57: tracepoint  name oncpu  tag 7aa55dfbdcb78941  gpl
   	loaded_at 2022-09-09T21:41:32+0800  uid 0
   	xlated 456B  jited 265B  memlock 4096B  map_ids 17,13,14,15
   	btf_id 82

   $ bpftool link
   1: tracing  prog 26
   	prog_type tracing  attach_type trace_fentry
   3: perf_event  prog 28
   10: perf_event  prog 57

The autoattach optional can support tracepoints, k(ret)probes,
u(ret)probes.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Link: https://lore.kernel.org/r/1665736275-28143-2-git-send-email-wangyufen@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/prog.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c81362a001ba..10ec29cb4598 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1453,6 +1453,67 @@ get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 	return ret;
 }
 
+static int
+auto_attach_program(struct bpf_program *prog, const char *path)
+{
+	struct bpf_link *link;
+	int err;
+
+	link = bpf_program__attach(prog);
+	if (!link) {
+		p_info("Program %s does not support autoattach, falling back to pinning",
+		       bpf_program__name(prog));
+		return bpf_obj_pin(bpf_program__fd(prog), path);
+	}
+
+	err = bpf_link__pin(link, path);
+	bpf_link__destroy(link);
+	return err;
+}
+
+static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
+{
+	int len;
+
+	len = snprintf(buf, buf_sz, "%s/%s", path, name);
+	if (len < 0)
+		return -EINVAL;
+	if ((size_t)len >= buf_sz)
+		return -ENAMETOOLONG;
+
+	return 0;
+}
+
+static int
+auto_attach_programs(struct bpf_object *obj, const char *path)
+{
+	struct bpf_program *prog;
+	char buf[PATH_MAX];
+	int err;
+
+	bpf_object__for_each_program(prog, obj) {
+		err = pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog));
+		if (err)
+			goto err_unpin_programs;
+
+		err = auto_attach_program(prog, buf);
+		if (err)
+			goto err_unpin_programs;
+	}
+
+	return 0;
+
+err_unpin_programs:
+	while ((prog = bpf_object__prev_program(obj, prog))) {
+		if (pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog)))
+			continue;
+
+		bpf_program__unpin(prog, buf);
+	}
+
+	return err;
+}
+
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
 	enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
@@ -1464,6 +1525,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 	struct bpf_program *prog = NULL, *pos;
 	unsigned int old_map_fds = 0;
 	const char *pinmaps = NULL;
+	bool auto_attach = false;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	const char *pinfile;
@@ -1583,6 +1645,9 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 				goto err_free_reuse_maps;
 
 			pinmaps = GET_ARG();
+		} else if (is_prefix(*argv, "autoattach")) {
+			auto_attach = true;
+			NEXT_ARG();
 		} else {
 			p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
 			      *argv);
@@ -1692,14 +1757,20 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 			goto err_close_obj;
 		}
 
-		err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
+		if (auto_attach)
+			err = auto_attach_program(prog, pinfile);
+		else
+			err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
 		if (err) {
 			p_err("failed to pin program %s",
 			      bpf_program__section_name(prog));
 			goto err_close_obj;
 		}
 	} else {
-		err = bpf_object__pin_programs(obj, pinfile);
+		if (auto_attach)
+			err = auto_attach_programs(obj, pinfile);
+		else
+			err = bpf_object__pin_programs(obj, pinfile);
 		if (err) {
 			p_err("failed to pin all programs");
 			goto err_close_obj;
@@ -2338,6 +2409,7 @@ static int do_help(int argc, char **argv)
 		"                         [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\\\n"
 		"                         [pinmaps MAP_DIR]\n"
+		"                         [autoattach]\n"
 		"       %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
 		"       %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
 		"       %1$s %2$s run PROG \\\n"
-- 
cgit v1.2.3-58-ga151


From ff0e9a579ec9f12cb4fec9bd94b15e3d613a1a77 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Fri, 14 Oct 2022 16:31:14 +0800
Subject: bpftool: Update doc (add autoattach to prog load)

Add autoattach optional to prog load|loadall for supporting
one-step load-attach-pin_link.

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Link: https://lore.kernel.org/r/1665736275-28143-3-git-send-email-wangyufen@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index eb1b2a254eb1..14de72544995 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -31,7 +31,7 @@ PROG COMMANDS
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
-|	**bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
+|	**bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
 |	**bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
@@ -131,7 +131,7 @@ DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
-	**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
+	**bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
 		  Load bpf program(s) from binary *OBJ* and pin as *PATH*.
 		  **bpftool prog load** pins only the first program from the
 		  *OBJ* as *PATH*. **bpftool prog loadall** pins all programs
@@ -150,6 +150,17 @@ DESCRIPTION
 		  Optional **pinmaps** argument can be provided to pin all
 		  maps under *MAP_DIR* directory.
 
+		  If **autoattach** is specified program will be attached
+		  before pin. In that case, only the link (representing the
+		  program attached to its hook) is pinned, not the program as
+		  such, so the path won't show in **bpftool prog show -f**,
+		  only show in **bpftool link show -f**. Also, this only works
+		  when bpftool (libbpf) is able to infer all necessary
+		  information from the object file, in particular, it's not
+		  supported for all program types. If a program does not
+		  support autoattach, bpftool falls back to regular pinning
+		  for that program instead.
+
 		  Note: *PATH* must be located in *bpffs* mount. It must not
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
-- 
cgit v1.2.3-58-ga151


From b81a677400755cf24c971d1342c4c53d81744d82 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Fri, 14 Oct 2022 16:31:15 +0800
Subject: bpftool: Update the bash completion(add autoattach to prog load)

Add autoattach optional to prog load|loadall for supporting
one-step load-attach-pin_link.

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Link: https://lore.kernel.org/r/1665736275-28143-4-git-send-email-wangyufen@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/bash-completion/bpftool | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index dc1641e3670e..2957b42cab67 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -505,6 +505,7 @@ _bpftool()
                             _bpftool_once_attr 'type'
                             _bpftool_once_attr 'dev'
                             _bpftool_once_attr 'pinmaps'
+                            _bpftool_once_attr 'autoattach'
                             return 0
                             ;;
                     esac
-- 
cgit v1.2.3-58-ga151


From eb814cf1adea0ce24413c26c22e9f1a556a45d34 Mon Sep 17 00:00:00 2001
From: Delyan Kratunov <delyank@meta.com>
Date: Fri, 21 Oct 2022 19:36:38 +0000
Subject: selftests/bpf: fix task_local_storage/exit_creds rcu usage

BPF CI has revealed flakiness in the task_local_storage/exit_creds test.
The failure point in CI [1] is that null_ptr_count is equal to 0,
which indicates that the program hasn't run yet. This points to the
kern_sync_rcu (sys_membarrier -> synchronize_rcu underneath) not
waiting sufficiently.

Indeed, synchronize_rcu only waits for read-side sections that started
before the call. If the program execution starts *during* the
synchronize_rcu invocation (due to, say, preemption), the test won't
wait long enough.

As a speculative fix, make the synchornize_rcu calls in a loop until
an explicit run counter has gone up.

  [1]: https://github.com/kernel-patches/bpf/actions/runs/3268263235/jobs/5374940791

Signed-off-by: Delyan Kratunov <delyank@meta.com>
Link: https://lore.kernel.org/r/156d4ef82275a074e8da8f4cffbd01b0c1466493.camel@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/task_local_storage.c      | 18 +++++++++++++++---
 .../bpf/progs/task_local_storage_exit_creds.c          |  3 +++
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 035c263aab1b..99a42a2b6e14 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -39,7 +39,8 @@ out:
 static void test_exit_creds(void)
 {
 	struct task_local_storage_exit_creds *skel;
-	int err;
+	int err, run_count, sync_rcu_calls = 0;
+	const int MAX_SYNC_RCU_CALLS = 1000;
 
 	skel = task_local_storage_exit_creds__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
@@ -53,8 +54,19 @@ static void test_exit_creds(void)
 	if (CHECK_FAIL(system("ls > /dev/null")))
 		goto out;
 
-	/* sync rcu to make sure exit_creds() is called for "ls" */
-	kern_sync_rcu();
+	/* kern_sync_rcu is not enough on its own as the read section we want
+	 * to wait for may start after we enter synchronize_rcu, so our call
+	 * won't wait for the section to finish. Loop on the run counter
+	 * as well to ensure the program has run.
+	 */
+	do {
+		kern_sync_rcu();
+		run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
+	} while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
+
+	ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
+		   "sync_rcu count too high");
+	ASSERT_NEQ(run_count, 0, "run_count");
 	ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
 	ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
 out:
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
index 81758c0aef99..41d88ed222ff 100644
--- a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -14,6 +14,7 @@ struct {
 	__type(value, __u64);
 } task_storage SEC(".maps");
 
+int run_count = 0;
 int valid_ptr_count = 0;
 int null_ptr_count = 0;
 
@@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task)
 		__sync_fetch_and_add(&valid_ptr_count, 1);
 	else
 		__sync_fetch_and_add(&null_ptr_count, 1);
+
+	__sync_fetch_and_add(&run_count, 1);
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From d9740535b857650bd6211a67ac0c0d574cba1dce Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 18 Oct 2022 10:55:38 -0400
Subject: libbpf: Avoid allocating reg_name with sscanf in parse_usdt_arg()

The reg_name in parse_usdt_arg() is used to hold register name, which
is short enough to be held in a 16-byte array, so we could define
reg_name as char reg_name[16] to avoid dynamically allocating reg_name
with sscanf.

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221018145538.2046842-1-xukuohai@huaweicloud.com
---
 tools/lib/bpf/usdt.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 49f3c3b7f609..28fa1b2283de 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1225,26 +1225,24 @@ static int calc_pt_regs_off(const char *reg_name)
 
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
 {
-	char *reg_name = NULL;
+	char reg_name[16];
 	int arg_sz, len, reg_off;
 	long off;
 
-	if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
+	if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
 		/* Memory dereference case, e.g., -4@-20(%rbp) */
 		arg->arg_type = USDT_ARG_REG_DEREF;
 		arg->val_off = off;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
-	} else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
+	} else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
 		/* Register read case, e.g., -4@%eax */
 		arg->arg_type = USDT_ARG_REG;
 		arg->val_off = 0;
 
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
@@ -1456,16 +1454,15 @@ static int calc_pt_regs_off(const char *reg_name)
 
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
 {
-	char *reg_name = NULL;
+	char reg_name[16];
 	int arg_sz, len, reg_off;
 	long off;
 
-	if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
+	if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
 		/* Memory dereference case, e.g., -8@-88(s0) */
 		arg->arg_type = USDT_ARG_REG_DEREF;
 		arg->val_off = off;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
@@ -1474,12 +1471,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 		arg->arg_type = USDT_ARG_CONST;
 		arg->val_off = off;
 		arg->reg_off = 0;
-	} else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
+	} else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
 		/* Register read case, e.g., -8@a1 */
 		arg->arg_type = USDT_ARG_REG;
 		arg->val_off = 0;
 		reg_off = calc_pt_regs_off(reg_name);
-		free(reg_name);
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
-- 
cgit v1.2.3-58-ga151


From 7e5eb725cf0a0a8f85e6b96e062bbd0d5d90c94e Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Thu, 20 Oct 2022 11:03:00 +0100
Subject: bpftool: Set binary name to "bpftool" in help and version output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commands "bpftool help" or "bpftool version" use argv[0] to display the
name of the binary. While it is a convenient way to retrieve the string,
it does not always produce the most readable output. For example,
because of the way bpftool is currently packaged on Ubuntu (using a
wrapper script), the command displays the absolute path for the binary:

    $ bpftool version | head -n 1
    /usr/lib/linux-tools/5.15.0-50-generic/bpftool v5.15.60

More generally, there is no apparent reason for keeping the whole path
and exact binary name in this output. If the user wants to understand
what binary is being called, there are other ways to do so. This commit
replaces argv[0] with "bpftool", to simply reflect what the tool is
called. This is aligned on what "ip" or "tc" do, for example.

As an additional benefit, this seems to help with integration with
Meson for packaging [0].

[0] https://github.com/NixOS/nixpkgs/pull/195934

Suggested-by: Vladimír Čunát <vladimir.cunat@nic.cz>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221020100300.69328-1-quentin@isovalent.com
---
 tools/bpf/bpftool/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index ccd7457f92bf..8bf3615f684f 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -450,7 +450,7 @@ int main(int argc, char **argv)
 	json_output = false;
 	show_pinned = false;
 	block_mount = false;
-	bin_name = argv[0];
+	bin_name = "bpftool";
 
 	opterr = 0;
 	while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
-- 
cgit v1.2.3-58-ga151


From 2c76238eaddd7c519f6b5a7ed80f7da6f4e11373 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Thu, 20 Oct 2022 11:03:32 +0100
Subject: bpftool: Add "bootstrap" feature to version output

Along with the version number, "bpftool version" displays a list of
features that were selected at compilation time for bpftool. It would be
useful to indicate in that list whether a binary is a bootstrap version
of bpftool. Given that an increasing number of components rely on
bootstrap versions for generating skeletons, this could help understand
what a binary is capable of if it has been copied outside of the usual
"bootstrap" directory.

To detect a bootstrap version, we simply rely on the absence of
implementation for the do_prog() function. To do this, we must move the
(unchanged) list of commands before do_version(), which in turn requires
renaming this "cmds" array to avoid shadowing it with the "cmds"
argument in cmd_select().

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221020100332.69563-1-quentin@isovalent.com
---
 tools/bpf/bpftool/main.c | 81 +++++++++++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 8bf3615f684f..b22223df4431 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -71,6 +71,27 @@ static int do_help(int argc, char **argv)
 	return 0;
 }
 
+static int do_batch(int argc, char **argv);
+static int do_version(int argc, char **argv);
+
+static const struct cmd commands[] = {
+	{ "help",	do_help },
+	{ "batch",	do_batch },
+	{ "prog",	do_prog },
+	{ "map",	do_map },
+	{ "link",	do_link },
+	{ "cgroup",	do_cgroup },
+	{ "perf",	do_perf },
+	{ "net",	do_net },
+	{ "feature",	do_feature },
+	{ "btf",	do_btf },
+	{ "gen",	do_gen },
+	{ "struct_ops",	do_struct_ops },
+	{ "iter",	do_iter },
+	{ "version",	do_version },
+	{ 0 }
+};
+
 #ifndef BPFTOOL_VERSION
 /* bpftool's major and minor version numbers are aligned on libbpf's. There is
  * an offset of 6 for the version number, because bpftool's version was higher
@@ -82,6 +103,15 @@ static int do_help(int argc, char **argv)
 #define BPFTOOL_PATCH_VERSION 0
 #endif
 
+static void
+print_feature(const char *feature, bool state, unsigned int *nb_features)
+{
+	if (state) {
+		printf("%s %s", *nb_features ? "," : "", feature);
+		*nb_features = *nb_features + 1;
+	}
+}
+
 static int do_version(int argc, char **argv)
 {
 #ifdef HAVE_LIBBFD_SUPPORT
@@ -94,6 +124,18 @@ static int do_version(int argc, char **argv)
 #else
 	const bool has_skeletons = true;
 #endif
+	bool bootstrap = false;
+	int i;
+
+	for (i = 0; commands[i].cmd; i++) {
+		if (!strcmp(commands[i].cmd, "prog")) {
+			/* Assume we run a bootstrap version if "bpftool prog"
+			 * is not available.
+			 */
+			bootstrap = !commands[i].func;
+			break;
+		}
+	}
 
 	if (json_output) {
 		jsonw_start_object(json_wtr);	/* root object */
@@ -114,6 +156,7 @@ static int do_version(int argc, char **argv)
 		jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
 		jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf);
 		jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
+		jsonw_bool_field(json_wtr, "bootstrap", bootstrap);
 		jsonw_end_object(json_wtr);	/* features */
 
 		jsonw_end_object(json_wtr);	/* root object */
@@ -128,16 +171,10 @@ static int do_version(int argc, char **argv)
 #endif
 		printf("using libbpf %s\n", libbpf_version_string());
 		printf("features:");
-		if (has_libbfd) {
-			printf(" libbfd");
-			nb_features++;
-		}
-		if (!legacy_libbpf) {
-			printf("%s libbpf_strict", nb_features++ ? "," : "");
-			nb_features++;
-		}
-		if (has_skeletons)
-			printf("%s skeletons", nb_features++ ? "," : "");
+		print_feature("libbfd", has_libbfd, &nb_features);
+		print_feature("libbpf_strict", !legacy_libbpf, &nb_features);
+		print_feature("skeletons", has_skeletons, &nb_features);
+		print_feature("bootstrap", bootstrap, &nb_features);
 		printf("\n");
 	}
 	return 0;
@@ -279,26 +316,6 @@ static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
 	return n_argc;
 }
 
-static int do_batch(int argc, char **argv);
-
-static const struct cmd cmds[] = {
-	{ "help",	do_help },
-	{ "batch",	do_batch },
-	{ "prog",	do_prog },
-	{ "map",	do_map },
-	{ "link",	do_link },
-	{ "cgroup",	do_cgroup },
-	{ "perf",	do_perf },
-	{ "net",	do_net },
-	{ "feature",	do_feature },
-	{ "btf",	do_btf },
-	{ "gen",	do_gen },
-	{ "struct_ops",	do_struct_ops },
-	{ "iter",	do_iter },
-	{ "version",	do_version },
-	{ 0 }
-};
-
 static int do_batch(int argc, char **argv)
 {
 	char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
@@ -386,7 +403,7 @@ static int do_batch(int argc, char **argv)
 			jsonw_name(json_wtr, "output");
 		}
 
-		err = cmd_select(cmds, n_argc, n_argv, do_help);
+		err = cmd_select(commands, n_argc, n_argv, do_help);
 
 		if (json_output)
 			jsonw_end_object(json_wtr);
@@ -528,7 +545,7 @@ int main(int argc, char **argv)
 	if (version_requested)
 		return do_version(argc, argv);
 
-	ret = cmd_select(cmds, argc, argv, do_help);
+	ret = cmd_select(commands, argc, argv, do_help);
 
 	if (json_output)
 		jsonw_destroy(&json_wtr);
-- 
cgit v1.2.3-58-ga151


From 7a42af4b94f109f19b563fa7930715a3f298bf1b Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Fri, 21 Oct 2022 14:06:58 -0700
Subject: selftests/bpf: Remove entries from config.s390x already present in
 config

`config.s390x` had entries already present in `config`.

When generating the config used by vmtest, we concatenate the `config`
file with the `config.{arch}` one, making those entries duplicated.

This patch removes that duplication.

Before:
$ comm -1 -2  <(sort tools/testing/selftests/bpf/config.s390x) <(sort
tools/testing/selftests/bpf/config)
CONFIG_MODULE_SIG=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
$

Ater:
$ comm -1 -2  <(sort tools/testing/selftests/bpf/config.s390x) <(sort
tools/testing/selftests/bpf/config)
$

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221021210701.728135-2-chantr4@gmail.com
---
 tools/testing/selftests/bpf/config.s390x | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index f8a7a258a718..d49f6170e7bd 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -82,9 +82,6 @@ CONFIG_MARCH_Z196_TUNE=y
 CONFIG_MEMCG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_MODULE_SIG=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULES=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
 CONFIG_NET_9P=y
-- 
cgit v1.2.3-58-ga151


From ec99451f0a488e50aaf0ce467db8771411edc407 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Fri, 21 Oct 2022 14:06:59 -0700
Subject: selftests/bpf: Add config.aarch64

config.aarch64, similarly to config.{s390x,x86_64} is a config enabling
building a kernel on aarch64 to be used in bpf's
selftests/kernel-patches CI.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221021210701.728135-3-chantr4@gmail.com
---
 tools/testing/selftests/bpf/config.aarch64 | 181 +++++++++++++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/config.aarch64

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
new file mode 100644
index 000000000000..1f0437644186
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -0,0 +1,181 @@
+CONFIG_9P_FS=y
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_ARM_SMMU_V3=y
+CONFIG_ATA=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BONDING=y
+CONFIG_BPFILTER=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BRIDGE=m
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_COMPAT=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM=y
+CONFIG_DUMMY=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_FB=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET_ESP=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KPROBES=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAILBOX=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NF_TABLES=y
+CONFIG_NLMON=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET_DIAG=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_PL320_MBOX=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TPM=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USELIB=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_XFRM_USER=y
-- 
cgit v1.2.3-58-ga151


From 20776b72ae2a43311f82f48f7b78f484cc89e463 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Fri, 21 Oct 2022 14:07:00 -0700
Subject: selftests/bpf: Update vmtests.sh to support aarch64

Add handling of aarch64 when setting QEMU options and provide the right
path to aarch64 kernel image.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221021210701.728135-4-chantr4@gmail.com
---
 tools/testing/selftests/bpf/vmtest.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index a29aa05ebb3e..316a56d680f2 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -21,6 +21,12 @@ x86_64)
 	QEMU_FLAGS=(-cpu host -smp 8)
 	BZIMAGE="arch/x86/boot/bzImage"
 	;;
+aarch64)
+	QEMU_BINARY=qemu-system-aarch64
+	QEMU_CONSOLE="ttyAMA0,115200"
+	QEMU_FLAGS=(-M virt,gic-version=3 -cpu host -smp 8)
+	BZIMAGE="arch/arm64/boot/Image"
+	;;
 *)
 	echo "Unsupported architecture"
 	exit 1
-- 
cgit v1.2.3-58-ga151


From 94d52a19180726ee8ddc70bea75d6605e1dd6029 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Fri, 21 Oct 2022 14:07:01 -0700
Subject: selftests/bpf: Initial DENYLIST for aarch64

Those tests are currently failing on aarch64, ignore them until they are
individually addressed.

Using this deny list, vmtest.sh ran successfully using

LLVM_STRIP=llvm-strip-16 CLANG=clang-16 \
    tools/testing/selftests/bpf/vmtest.sh  -- \
        ./test_progs -d \
            \"$(cat tools/testing/selftests/bpf/DENYLIST{,.aarch64} \
                | cut -d'#' -f1 \
                | sed -e 's/^[[:space:]]*//' \
                      -e 's/[[:space:]]*$//' \
                | tr -s '\n' ','\
            )\"

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221021210701.728135-5-chantr4@gmail.com
---
 tools/testing/selftests/bpf/DENYLIST.aarch64 | 81 ++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/DENYLIST.aarch64

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
new file mode 100644
index 000000000000..09416d5d2e33
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -0,0 +1,81 @@
+bloom_filter_map                                 # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22
+bpf_cookie/lsm
+bpf_cookie/multi_kprobe_attach_api
+bpf_cookie/multi_kprobe_link_api
+bpf_cookie/trampoline
+bpf_loop/check_callback_fn_stop                  # link unexpected error: -524
+bpf_loop/check_invalid_flags
+bpf_loop/check_nested_calls
+bpf_loop/check_non_constant_callback
+bpf_loop/check_nr_loops
+bpf_loop/check_null_callback_ctx
+bpf_loop/check_stack
+bpf_mod_race                                     # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524)
+bpf_tcp_ca/dctcp_fallback
+btf_dump/btf_dump: var_data                      # find type id unexpected find type id: actual -2 < expected 0
+cgroup_hierarchical_stats                        # attach unexpected error: -524 (errno 524)
+d_path/basic                                     # setup attach failed: -524
+deny_namespace                                   # attach unexpected error: -524 (errno 524)
+fentry_fexit                                     # fentry_attach unexpected error: -1 (errno 524)
+fentry_test                                      # fentry_attach unexpected error: -1 (errno 524)
+fexit_sleep                                      # fexit_attach fexit attach failed: -1
+fexit_stress                                     # fexit attach unexpected fexit attach: actual -524 < expected 0
+fexit_test                                       # fexit_attach unexpected error: -1 (errno 524)
+get_func_args_test                               # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline)
+get_func_ip_test                                 # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline)
+htab_update/reenter_update
+kfree_skb                                        # attach fentry unexpected error: -524 (trampoline)
+kfunc_call/subprog                               # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
+kfunc_call/subprog_lskel                         # skel unexpected error: -2
+kfunc_dynptr_param/dynptr_data_null              # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
+kprobe_multi_test/attach_api_addrs               # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+kprobe_multi_test/attach_api_pattern             # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+kprobe_multi_test/attach_api_syms                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+kprobe_multi_test/bench_attach                   # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+kprobe_multi_test/link_api_addrs                 # link_fd unexpected link_fd: actual -95 < expected 0
+kprobe_multi_test/link_api_syms                  # link_fd unexpected link_fd: actual -95 < expected 0
+kprobe_multi_test/skel_api                       # kprobe_multi__attach unexpected error: -524 (errno 524)
+ksyms_module/libbpf                              # 'bpf_testmod_ksym_percpu': not found in kernel BTF
+ksyms_module/lskel                               # test_ksyms_module_lskel__open_and_load unexpected error: -2
+libbpf_get_fd_by_id_opts                         # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
+lookup_key                                       # test_lookup_key__attach unexpected error: -524 (errno 524)
+lru_bug                                          # lru_bug__attach unexpected error: -524 (errno 524)
+modify_return                                    # modify_return__attach failed unexpected error: -524 (errno 524)
+module_attach                                    # skel_attach skeleton attach failed: -524
+mptcp/base                                       # run_test mptcp unexpected error: -524 (errno 524)
+netcnt                                           # packets unexpected packets: actual 10001 != expected 10000
+recursion                                        # skel_attach unexpected error: -524 (errno 524)
+ringbuf                                          # skel_attach skeleton attachment failed: -1
+setget_sockopt                                   # attach_cgroup unexpected error: -524
+sk_storage_tracing                               # test_sk_storage_tracing__attach unexpected error: -524 (errno 524)
+skc_to_unix_sock                                 # could not attach BPF object unexpected error: -524 (errno 524)
+socket_cookie                                    # prog_attach unexpected error: -524
+stacktrace_build_id                              # compare_stack_ips stackmap vs. stack_amap err -1 errno 2
+task_local_storage/exit_creds                    # skel_attach unexpected error: -524 (errno 524)
+task_local_storage/recursion                     # skel_attach unexpected error: -524 (errno 524)
+test_bprm_opts                                   # attach attach failed: -524
+test_ima                                         # attach attach failed: -524
+test_local_storage                               # attach lsm attach failed: -524
+test_lsm                                         # test_lsm_first_attach unexpected error: -524 (errno 524)
+test_overhead                                    # attach_fentry unexpected error: -524
+timer                                            # timer unexpected error: -524 (errno 524)
+timer_crash                                      # timer_crash__attach unexpected error: -524 (errno 524)
+timer_mim                                        # timer_mim unexpected error: -524 (errno 524)
+trace_printk                                     # trace_printk__attach unexpected error: -1 (errno 524)
+trace_vprintk                                    # trace_vprintk__attach unexpected error: -1 (errno 524)
+tracing_struct                                   # tracing_struct__attach unexpected error: -524 (errno 524)
+trampoline_count                                 # attach_prog unexpected error: -524
+unpriv_bpf_disabled                              # skel_attach unexpected error: -524 (errno 524)
+user_ringbuf/test_user_ringbuf_post_misaligned   # misaligned_skel unexpected error: -524 (errno 524)
+user_ringbuf/test_user_ringbuf_post_producer_wrong_offset
+user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz
+user_ringbuf/test_user_ringbuf_basic             # ringbuf_basic_skel unexpected error: -524 (errno 524)
+user_ringbuf/test_user_ringbuf_sample_full_ring_buffer
+user_ringbuf/test_user_ringbuf_post_alignment_autoadjust
+user_ringbuf/test_user_ringbuf_overfill
+user_ringbuf/test_user_ringbuf_discards_properly_ignored
+user_ringbuf/test_user_ringbuf_loop
+user_ringbuf/test_user_ringbuf_msg_protocol
+user_ringbuf/test_user_ringbuf_blocking_reserve
+verify_pkcs7_sig                                 # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524)
+vmlinux                                          # skel_attach skeleton attach failed: -524
-- 
cgit v1.2.3-58-ga151


From 51ee71d38d8ce5a30496251200ab2f12b71e1fb6 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Thu, 20 Oct 2022 09:07:20 -0700
Subject: selftests/bpf: Add test verifying bpf_ringbuf_reserve retval use in
 map ops

Add a test_ringbuf_map_key test prog, borrowing heavily from extant
test_ringbuf.c. The program tries to use the result of
bpf_ringbuf_reserve as map_key, which was not possible before previouis
commits in this series. The test runner added to prog_tests/ringbuf.c
verifies that the program loads and does basic sanity checks to confirm
that it runs as expected.

Also, refactor test_ringbuf such that runners for existing test_ringbuf
and newly-added test_ringbuf_map_key are subtests of 'ringbuf' top-level
test.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221020160721.4030492-3-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |  8 ++-
 tools/testing/selftests/bpf/prog_tests/ringbuf.c   | 66 +++++++++++++++++++-
 .../selftests/bpf/progs/test_ringbuf_map_key.c     | 70 ++++++++++++++++++++++
 3 files changed, 140 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e6cf21fad69f..79edef1dbda4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -359,9 +359,11 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
 		test_usdt.skel.h
 
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \
-	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
-	map_ptr_kern.c core_kern.c core_kern_overflow.c
+LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c 		\
+	trace_printk.c trace_vprintk.c map_ptr_kern.c 			\
+	core_kern.c core_kern_overflow.c test_ringbuf.c			\
+	test_ringbuf_map_key.c
+
 # Generate both light skeleton and libbpf skeleton for these
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
 	kfunc_call_test_subprog.c
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 9a80fe8a6427..ac104dc652e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -13,6 +13,7 @@
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
 #include "test_ringbuf.lskel.h"
+#include "test_ringbuf_map_key.lskel.h"
 
 #define EDONE 7777
 
@@ -58,6 +59,7 @@ static int process_sample(void *ctx, void *data, size_t len)
 	}
 }
 
+static struct test_ringbuf_map_key_lskel *skel_map_key;
 static struct test_ringbuf_lskel *skel;
 static struct ring_buffer *ringbuf;
 
@@ -81,7 +83,7 @@ static void *poll_thread(void *input)
 	return (void *)(long)ring_buffer__poll(ringbuf, timeout);
 }
 
-void test_ringbuf(void)
+static void ringbuf_subtest(void)
 {
 	const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
 	pthread_t thread;
@@ -297,3 +299,65 @@ cleanup:
 	ring_buffer__free(ringbuf);
 	test_ringbuf_lskel__destroy(skel);
 }
+
+static int process_map_key_sample(void *ctx, void *data, size_t len)
+{
+	struct sample *s;
+	int err, val;
+
+	s = data;
+	switch (s->seq) {
+	case 1:
+		ASSERT_EQ(s->value, 42, "sample_value");
+		err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd,
+					  s, &val);
+		ASSERT_OK(err, "hash_map bpf_map_lookup_elem");
+		ASSERT_EQ(val, 1, "hash_map val");
+		return -EDONE;
+	default:
+		return 0;
+	}
+}
+
+static void ringbuf_map_key_subtest(void)
+{
+	int err;
+
+	skel_map_key = test_ringbuf_map_key_lskel__open();
+	if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open"))
+		return;
+
+	skel_map_key->maps.ringbuf.max_entries = getpagesize();
+	skel_map_key->bss->pid = getpid();
+
+	err = test_ringbuf_map_key_lskel__load(skel_map_key);
+	if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load"))
+		goto cleanup;
+
+	ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd,
+				   process_map_key_sample, NULL, NULL);
+	if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+		goto cleanup;
+
+	err = test_ringbuf_map_key_lskel__attach(skel_map_key);
+	if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach"))
+		goto cleanup_ringbuf;
+
+	syscall(__NR_getpgid);
+	ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq");
+	err = ring_buffer__poll(ringbuf, -1);
+	ASSERT_EQ(err, -EDONE, "ring_buffer__poll");
+
+cleanup_ringbuf:
+	ring_buffer__free(ringbuf);
+cleanup:
+	test_ringbuf_map_key_lskel__destroy(skel_map_key);
+}
+
+void test_ringbuf(void)
+{
+	if (test__start_subtest("ringbuf"))
+		ringbuf_subtest();
+	if (test__start_subtest("ringbuf_map_key"))
+		ringbuf_map_key_subtest();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
new file mode 100644
index 000000000000..2760bf60d05a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1000);
+	__type(key, struct sample);
+	__type(value, int);
+} hash_map SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_mem_map_key(void *ctx)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	struct sample *sample, sample_copy;
+	int *lookup_val;
+
+	if (cur_pid != pid)
+		return 0;
+
+	sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+	if (!sample)
+		return 0;
+
+	sample->pid = pid;
+	bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+	sample->seq = ++seq;
+	sample->value = 42;
+
+	/* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg
+	 */
+	lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample);
+
+	/* workaround - memcpy is necessary so that verifier doesn't
+	 * complain with:
+	 *   verifier internal error: more than one arg with ref_obj_id R3
+	 * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY);
+	 *
+	 * Since bpf_map_lookup_elem above uses 'sample' as key, test using
+	 * sample field as value below
+	 */
+	__builtin_memcpy(&sample_copy, sample, sizeof(struct sample));
+	bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY);
+
+	bpf_ringbuf_submit(sample, 0);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 8f4bc15b9ad73434643aadb19506e1547bedf7eb Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Thu, 20 Oct 2022 09:07:21 -0700
Subject: selftests/bpf: Add write to hashmap to array_map iter test

Modify iter prog in existing bpf_iter_bpf_array_map.c, which currently
dumps arraymap key/val, to also do a write of (val, key) into a
newly-added hashmap. Confirm that the write succeeds as expected by
modifying the userspace runner program.

Before a change added in an earlier commit - considering PTR_TO_BUF reg
a valid input to helpers which expect MAP_{KEY,VAL} - the verifier
would've rejected this prog change due to type mismatch. Since using
current iter's key/val to access a separate map is a reasonable usecase,
let's add support for it.

Note that the test prog cannot directly write (val, key) into hashmap
via bpf_map_update_elem when both come from iter context because key is
marked MEM_RDONLY. This is due to bpf_map_update_elem - and other basic
map helpers - taking ARG_PTR_TO_MAP_{KEY,VALUE} w/o MEM_RDONLY type
flag. bpf_map_{lookup,update,delete}_elem don't modify their
input key/val so it should be possible to tag their args READONLY, but
due to the ubiquitous use of these helpers and verifier checks for
type == MAP_VALUE, such a change is nontrivial and seems better to
address in a followup series.

Also fixup some 'goto's in test runner's map checking loop.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221020160721.4030492-4-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c   | 20 ++++++++++++++------
 .../selftests/bpf/progs/bpf_iter_bpf_array_map.c    | 21 ++++++++++++++++++++-
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index c39d40f4b268..6f8ed61fc4b4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -941,10 +941,10 @@ static void test_bpf_array_map(void)
 {
 	__u64 val, expected_val = 0, res_first_val, first_val = 0;
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
-	__u32 expected_key = 0, res_first_key;
+	__u32 key, expected_key = 0, res_first_key;
+	int err, i, map_fd, hash_fd, iter_fd;
 	struct bpf_iter_bpf_array_map *skel;
 	union bpf_iter_link_info linfo;
-	int err, i, map_fd, iter_fd;
 	struct bpf_link *link;
 	char buf[64] = {};
 	int len, start;
@@ -1001,12 +1001,20 @@ static void test_bpf_array_map(void)
 	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
+	hash_fd = bpf_map__fd(skel->maps.hashmap1);
 	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
 		err = bpf_map_lookup_elem(map_fd, &i, &val);
-		if (!ASSERT_OK(err, "map_lookup"))
-			goto out;
-		if (!ASSERT_EQ(i, val, "invalid_val"))
-			goto out;
+		if (!ASSERT_OK(err, "map_lookup arraymap1"))
+			goto close_iter;
+		if (!ASSERT_EQ(i, val, "invalid_val arraymap1"))
+			goto close_iter;
+
+		val = i + 4;
+		err = bpf_map_lookup_elem(hash_fd, &val, &key);
+		if (!ASSERT_OK(err, "map_lookup hashmap1"))
+			goto close_iter;
+		if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1"))
+			goto close_iter;
 	}
 
 close_iter:
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
index 6286023fd62b..c5969ca6f26b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -19,13 +19,20 @@ struct {
 	__type(value, __u64);
 } arraymap1 SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 10);
+	__type(key, __u64);
+	__type(value, __u32);
+} hashmap1 SEC(".maps");
+
 __u32 key_sum = 0;
 __u64 val_sum = 0;
 
 SEC("iter/bpf_map_elem")
 int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
 {
-	__u32 *key = ctx->key;
+	__u32 *hmap_val, *key = ctx->key;
 	__u64 *val = ctx->value;
 
 	if (key == (void *)0 || val == (void *)0)
@@ -35,6 +42,18 @@ int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
 	bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
 	key_sum += *key;
 	val_sum += *val;
+
+	/* workaround - It's necessary to do this convoluted (val, key)
+	 * write into hashmap1, instead of simply doing
+	 *   bpf_map_update_elem(&hashmap1, val, key, BPF_ANY);
+	 * because key has MEM_RDONLY flag and bpf_map_update elem expects
+	 * types without this flag
+	 */
+	bpf_map_update_elem(&hashmap1, val, val, BPF_ANY);
+	hmap_val = bpf_map_lookup_elem(&hashmap1, val);
+	if (hmap_val)
+		*hmap_val = *key;
+
 	*val = *key;
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 6df96146b2025e122447354daf66edbfa88e8a1e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 21 Oct 2022 13:44:35 -0700
Subject: selftest: Add test for SO_INCOMING_CPU.

Some highly optimised applications use SO_INCOMING_CPU to make them
efficient, but they didn't test if it's working correctly by getsockopt()
to avoid slowing down.  As a result, no one noticed it had been broken
for years, so it's a good time to add a test to catch future regression.

The test does

  1) Create $(nproc) TCP listeners associated with each CPU.

  2) Create 32 child sockets for each listener by calling
     sched_setaffinity() for each CPU.

  3) Check if accept()ed sockets' sk_incoming_cpu matches
     listener's one.

If we see -EAGAIN, SO_INCOMING_CPU is broken.  However, we might not see
any error even if broken; the kernel could miraculously distribute all SYN
to correct listeners.  Not to let that happen, we must increase the number
of clients and CPUs to some extent, so the test requires $(nproc) >= 2 and
creates 64 sockets at least.

Test:
  $ nproc
  96
  $ ./so_incoming_cpu

Before the previous patch:

  # Starting 12 tests from 5 test cases.
  #  RUN           so_incoming_cpu.before_reuseport.test1 ...
  # so_incoming_cpu.c:191:test1:Expected cpu (5) == i (0)
  # test1: Test terminated by assertion
  #          FAIL  so_incoming_cpu.before_reuseport.test1
  not ok 1 so_incoming_cpu.before_reuseport.test1
  ...
  # FAILED: 0 / 12 tests passed.
  # Totals: pass:0 fail:12 xfail:0 xpass:0 skip:0 error:0

After:

  # Starting 12 tests from 5 test cases.
  #  RUN           so_incoming_cpu.before_reuseport.test1 ...
  # so_incoming_cpu.c:199:test1:SO_INCOMING_CPU is very likely to be working correctly with 3072 sockets.
  #            OK  so_incoming_cpu.before_reuseport.test1
  ok 1 so_incoming_cpu.before_reuseport.test1
  ...
  # PASSED: 12 / 12 tests passed.
  # Totals: pass:12 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/.gitignore        |   1 +
 tools/testing/selftests/net/Makefile          |   1 +
 tools/testing/selftests/net/so_incoming_cpu.c | 242 ++++++++++++++++++++++++++
 3 files changed, 244 insertions(+)
 create mode 100644 tools/testing/selftests/net/so_incoming_cpu.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 3d7adee7a3e6..ff8807cc9c2e 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -25,6 +25,7 @@ rxtimestamp
 sk_bind_sendto_listen
 sk_connect_zero_addr
 socket
+so_incoming_cpu
 so_netns_cookie
 so_txtime
 stress_reuseport_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 69c58362c0ed..cec4800cb017 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -71,6 +71,7 @@ TEST_GEN_FILES += bind_bhash
 TEST_GEN_PROGS += sk_bind_sendto_listen
 TEST_GEN_PROGS += sk_connect_zero_addr
 TEST_PROGS += test_ingress_egress_chaining.sh
+TEST_GEN_PROGS += so_incoming_cpu
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
new file mode 100644
index 000000000000..0e04f9fef986
--- /dev/null
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/sysinfo.h>
+
+#include "../kselftest_harness.h"
+
+#define CLIENT_PER_SERVER	32 /* More sockets, more reliable */
+#define NR_SERVER		self->nproc
+#define NR_CLIENT		(CLIENT_PER_SERVER * NR_SERVER)
+
+FIXTURE(so_incoming_cpu)
+{
+	int nproc;
+	int *servers;
+	union {
+		struct sockaddr addr;
+		struct sockaddr_in in_addr;
+	};
+	socklen_t addrlen;
+};
+
+enum when_to_set {
+	BEFORE_REUSEPORT,
+	BEFORE_LISTEN,
+	AFTER_LISTEN,
+	AFTER_ALL_LISTEN,
+};
+
+FIXTURE_VARIANT(so_incoming_cpu)
+{
+	int when_to_set;
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport)
+{
+	.when_to_set = BEFORE_REUSEPORT,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen)
+{
+	.when_to_set = BEFORE_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen)
+{
+	.when_to_set = AFTER_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
+{
+	.when_to_set = AFTER_ALL_LISTEN,
+};
+
+FIXTURE_SETUP(so_incoming_cpu)
+{
+	self->nproc = get_nprocs();
+	ASSERT_LE(2, self->nproc);
+
+	self->servers = malloc(sizeof(int) * NR_SERVER);
+	ASSERT_NE(self->servers, NULL);
+
+	self->in_addr.sin_family = AF_INET;
+	self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	self->in_addr.sin_port = htons(0);
+	self->addrlen = sizeof(struct sockaddr_in);
+}
+
+FIXTURE_TEARDOWN(so_incoming_cpu)
+{
+	int i;
+
+	for (i = 0; i < NR_SERVER; i++)
+		close(self->servers[i]);
+
+	free(self->servers);
+}
+
+void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu)
+{
+	int ret;
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int));
+	ASSERT_EQ(ret, 0);
+}
+
+int create_server(struct __test_metadata *_metadata,
+		  FIXTURE_DATA(so_incoming_cpu) *self,
+		  const FIXTURE_VARIANT(so_incoming_cpu) *variant,
+		  int cpu)
+{
+	int fd, ret;
+
+	fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	ASSERT_NE(fd, -1);
+
+	if (variant->when_to_set == BEFORE_REUSEPORT)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int));
+	ASSERT_EQ(ret, 0);
+
+	ret = bind(fd, &self->addr, self->addrlen);
+	ASSERT_EQ(ret, 0);
+
+	if (variant->when_to_set == BEFORE_LISTEN)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	/* We don't use CLIENT_PER_SERVER here not to block
+	 * this test at connect() if SO_INCOMING_CPU is broken.
+	 */
+	ret = listen(fd, NR_CLIENT);
+	ASSERT_EQ(ret, 0);
+
+	if (variant->when_to_set == AFTER_LISTEN)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	return fd;
+}
+
+void create_servers(struct __test_metadata *_metadata,
+		    FIXTURE_DATA(so_incoming_cpu) *self,
+		    const FIXTURE_VARIANT(so_incoming_cpu) *variant)
+{
+	int i, ret;
+
+	for (i = 0; i < NR_SERVER; i++) {
+		self->servers[i] = create_server(_metadata, self, variant, i);
+
+		if (i == 0) {
+			ret = getsockname(self->servers[i], &self->addr, &self->addrlen);
+			ASSERT_EQ(ret, 0);
+		}
+	}
+
+	if (variant->when_to_set == AFTER_ALL_LISTEN) {
+		for (i = 0; i < NR_SERVER; i++)
+			set_so_incoming_cpu(_metadata, self->servers[i], i);
+	}
+}
+
+void create_clients(struct __test_metadata *_metadata,
+		    FIXTURE_DATA(so_incoming_cpu) *self)
+{
+	cpu_set_t cpu_set;
+	int i, j, fd, ret;
+
+	for (i = 0; i < NR_SERVER; i++) {
+		CPU_ZERO(&cpu_set);
+
+		CPU_SET(i, &cpu_set);
+		ASSERT_EQ(CPU_COUNT(&cpu_set), 1);
+		ASSERT_NE(CPU_ISSET(i, &cpu_set), 0);
+
+		/* Make sure SYN will be processed on the i-th CPU
+		 * and finally distributed to the i-th listener.
+		 */
+		sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+		ASSERT_EQ(ret, 0);
+
+		for (j = 0; j < CLIENT_PER_SERVER; j++) {
+			fd  = socket(AF_INET, SOCK_STREAM, 0);
+			ASSERT_NE(fd, -1);
+
+			ret = connect(fd, &self->addr, self->addrlen);
+			ASSERT_EQ(ret, 0);
+
+			close(fd);
+		}
+	}
+}
+
+void verify_incoming_cpu(struct __test_metadata *_metadata,
+			 FIXTURE_DATA(so_incoming_cpu) *self)
+{
+	int i, j, fd, cpu, ret, total = 0;
+	socklen_t len = sizeof(int);
+
+	for (i = 0; i < NR_SERVER; i++) {
+		for (j = 0; j < CLIENT_PER_SERVER; j++) {
+			/* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
+			fd = accept(self->servers[i], &self->addr, &self->addrlen);
+			ASSERT_NE(fd, -1);
+
+			ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);
+			ASSERT_EQ(ret, 0);
+			ASSERT_EQ(cpu, i);
+
+			close(fd);
+			total++;
+		}
+	}
+
+	ASSERT_EQ(total, NR_CLIENT);
+	TH_LOG("SO_INCOMING_CPU is very likely to be "
+	       "working correctly with %d sockets.", total);
+}
+
+TEST_F(so_incoming_cpu, test1)
+{
+	create_servers(_metadata, self, variant);
+	create_clients(_metadata, self);
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test2)
+{
+	int server;
+
+	create_servers(_metadata, self, variant);
+
+	/* No CPU specified */
+	server = create_server(_metadata, self, variant, -1);
+	close(server);
+
+	create_clients(_metadata, self);
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test3)
+{
+	int server, client;
+
+	create_servers(_metadata, self, variant);
+
+	/* No CPU specified */
+	server = create_server(_metadata, self, variant, -1);
+
+	create_clients(_metadata, self);
+
+	/* Never receive any requests */
+	client = accept(server, &self->addr, &self->addrlen);
+	ASSERT_EQ(client, -1);
+
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
-- 
cgit v1.2.3-58-ga151


From b3d84af7cdfc079ef86d94f7cf125821559925fa Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:22 +0100
Subject: bpftool: Define _GNU_SOURCE only once
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_GNU_SOURCE is defined in several source files for bpftool, but only one
of them takes the precaution of checking whether the value is already
defined. Add #ifndef for other occurrences too.

This is in preparation for the support of disassembling JIT-ed programs
with LLVM, with $(llvm-config --cflags) passing -D_GNU_SOURCE as a
compilation argument.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221025150329.97371-2-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/common.c        | 2 ++
 tools/bpf/bpftool/iter.c          | 2 ++
 tools/bpf/bpftool/jit_disasm.c    | 2 ++
 tools/bpf/bpftool/net.c           | 2 ++
 tools/bpf/bpftool/perf.c          | 2 ++
 tools/bpf/bpftool/prog.c          | 2 ++
 tools/bpf/bpftool/xlated_dumper.c | 2 ++
 7 files changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 8727765add88..4c2e909a2d67 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index f88fdc820d23..a3e6b167153d 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (C) 2020 Facebook
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <unistd.h>
 #include <linux/err.h>
 #include <bpf/libbpf.h>
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index aaf99a0168c9..71cb258ab0ee 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -11,7 +11,9 @@
  * Licensed under the GNU General Public License, version 2.0 (GPLv2)
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 526a332c48e6..c40e44c938ae 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (C) 2018 Facebook
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 226ec2c39052..91743445e4c7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -2,7 +2,9 @@
 // Copyright (C) 2018 Facebook
 // Author: Yonghong Song <yhs@fb.com>
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 10ec29cb4598..dbf5489b8fde 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 2d9cd6a7b3c8..6fe3134ae45d 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2018 Netronome Systems, Inc. */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
-- 
cgit v1.2.3-58-ga151


From 55b4de58d0e2aca810ed2b198a0173640300acf8 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:23 +0100
Subject: bpftool: Remove asserts from JIT disassembler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The JIT disassembler in bpftool is the only components (with the JSON
writer) using asserts to check the return values of functions. But it
does not do so in a consistent way, and diasm_print_insn() returns no
value, although sometimes the operation failed.

Remove the asserts, and instead check the return values, print messages
on errors, and propagate the error to the caller from prog.c.

Remove the inclusion of assert.h from jit_disasm.c, and also from map.c
where it is unused.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221025150329.97371-3-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/jit_disasm.c | 51 +++++++++++++++++++++++++++++-------------
 tools/bpf/bpftool/main.h       | 25 +++++++++++----------
 tools/bpf/bpftool/map.c        |  1 -
 tools/bpf/bpftool/prog.c       | 15 ++++++++-----
 4 files changed, 57 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 71cb258ab0ee..723a9b799a0c 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -18,7 +18,6 @@
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdlib.h>
-#include <assert.h>
 #include <unistd.h>
 #include <string.h>
 #include <bfd.h>
@@ -31,14 +30,18 @@
 #include "json_writer.h"
 #include "main.h"
 
-static void get_exec_path(char *tpath, size_t size)
+static int get_exec_path(char *tpath, size_t size)
 {
 	const char *path = "/proc/self/exe";
 	ssize_t len;
 
 	len = readlink(path, tpath, size - 1);
-	assert(len > 0);
+	if (len <= 0)
+		return -1;
+
 	tpath[len] = 0;
+
+	return 0;
 }
 
 static int oper_count;
@@ -99,30 +102,39 @@ static int fprintf_json_styled(void *out,
 	return r;
 }
 
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch, const char *disassembler_options,
-		       const struct btf *btf,
-		       const struct bpf_prog_linfo *prog_linfo,
-		       __u64 func_ksym, unsigned int func_idx,
-		       bool linum)
+int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		      const char *arch, const char *disassembler_options,
+		      const struct btf *btf,
+		      const struct bpf_prog_linfo *prog_linfo,
+		      __u64 func_ksym, unsigned int func_idx,
+		      bool linum)
 {
 	const struct bpf_line_info *linfo = NULL;
 	disassembler_ftype disassemble;
+	int count, i, pc = 0, err = -1;
 	struct disassemble_info info;
 	unsigned int nr_skip = 0;
-	int count, i, pc = 0;
 	char tpath[PATH_MAX];
 	bfd *bfdf;
 
 	if (!len)
-		return;
+		return -1;
 
 	memset(tpath, 0, sizeof(tpath));
-	get_exec_path(tpath, sizeof(tpath));
+	if (get_exec_path(tpath, sizeof(tpath))) {
+		p_err("failed to create disasembler (get_exec_path)");
+		return -1;
+	}
 
 	bfdf = bfd_openr(tpath, NULL);
-	assert(bfdf);
-	assert(bfd_check_format(bfdf, bfd_object));
+	if (!bfdf) {
+		p_err("failed to create disassembler (bfd_openr)");
+		return -1;
+	}
+	if (!bfd_check_format(bfdf, bfd_object)) {
+		p_err("failed to create disassembler (bfd_check_format)");
+		goto exit_close;
+	}
 
 	if (json_output)
 		init_disassemble_info_compat(&info, stdout,
@@ -141,7 +153,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 			bfdf->arch_info = inf;
 		} else {
 			p_err("No libbfd support for %s", arch);
-			return;
+			goto exit_close;
 		}
 	}
 
@@ -162,7 +174,10 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 #else
 	disassemble = disassembler(bfdf);
 #endif
-	assert(disassemble);
+	if (!disassemble) {
+		p_err("failed to create disassembler");
+		goto exit_close;
+	}
 
 	if (json_output)
 		jsonw_start_array(json_wtr);
@@ -226,7 +241,11 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	err = 0;
+
+exit_close:
 	bfd_close(bfdf);
+	return err;
 }
 
 int disasm_init(void)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 5e5060c2ac04..c9e171082cf6 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -173,22 +173,23 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 struct bpf_prog_linfo;
 #ifdef HAVE_LIBBFD_SUPPORT
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch, const char *disassembler_options,
-		       const struct btf *btf,
-		       const struct bpf_prog_linfo *prog_linfo,
-		       __u64 func_ksym, unsigned int func_idx,
-		       bool linum);
+int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		      const char *arch, const char *disassembler_options,
+		      const struct btf *btf,
+		      const struct bpf_prog_linfo *prog_linfo,
+		      __u64 func_ksym, unsigned int func_idx,
+		      bool linum);
 int disasm_init(void);
 #else
 static inline
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch, const char *disassembler_options,
-		       const struct btf *btf,
-		       const struct bpf_prog_linfo *prog_linfo,
-		       __u64 func_ksym, unsigned int func_idx,
-		       bool linum)
+int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		      const char *arch, const char *disassembler_options,
+		      const struct btf *btf,
+		      const struct bpf_prog_linfo *prog_linfo,
+		      __u64 func_ksym, unsigned int func_idx,
+		      bool linum)
 {
+	return 0;
 }
 static inline int disasm_init(void)
 {
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 9a6ca9f31133..3087ced658ad 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
-#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <linux/err.h>
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index dbf5489b8fde..93dfb89b85e3 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -822,10 +822,11 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 					printf("%s:\n", sym_name);
 				}
 
-				disasm_print_insn(img, lens[i], opcodes,
-						  name, disasm_opt, btf,
-						  prog_linfo, ksyms[i], i,
-						  linum);
+				if (disasm_print_insn(img, lens[i], opcodes,
+						      name, disasm_opt, btf,
+						      prog_linfo, ksyms[i], i,
+						      linum))
+					goto exit_free;
 
 				img += lens[i];
 
@@ -838,8 +839,10 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 			if (json_output)
 				jsonw_end_array(json_wtr);
 		} else {
-			disasm_print_insn(buf, member_len, opcodes, name,
-					  disasm_opt, btf, NULL, 0, 0, false);
+			if (disasm_print_insn(buf, member_len, opcodes, name,
+					      disasm_opt, btf, NULL, 0, 0,
+					      false))
+				goto exit_free;
 		}
 	} else if (visual) {
 		if (json_output)
-- 
cgit v1.2.3-58-ga151


From 108326d6fa6c04e1473886fd662981792fc708a2 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:24 +0100
Subject: bpftool: Split FEATURE_TESTS/FEATURE_DISPLAY definitions in Makefile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make FEATURE_TESTS and FEATURE_DISPLAY easier to read and less likely to
be subject to conflicts on updates by having one feature per line.

Suggested-by: Andres Freund <andres@anarazel.de>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221025150329.97371-4-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4a95c017ad4c..0218d6a1cae7 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -93,11 +93,20 @@ INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \
-	disassembler-four-args disassembler-init-styled libcap \
-	clang-bpf-co-re
-FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \
-	libcap clang-bpf-co-re
+
+FEATURE_TESTS := clang-bpf-co-re
+FEATURE_TESTS += libcap
+FEATURE_TESTS += libbfd
+FEATURE_TESTS += libbfd-liberty
+FEATURE_TESTS += libbfd-liberty-z
+FEATURE_TESTS += disassembler-four-args
+FEATURE_TESTS += disassembler-init-styled
+
+FEATURE_DISPLAY := clang-bpf-co-re
+FEATURE_DISPLAY += libcap
+FEATURE_DISPLAY += libbfd
+FEATURE_DISPLAY += libbfd-liberty
+FEATURE_DISPLAY += libbfd-liberty-z
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
-- 
cgit v1.2.3-58-ga151


From 2ea4d86a5093defcb2fc49799184ede178e64d36 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:25 +0100
Subject: bpftool: Group libbfd defs in Makefile, only pass them if we use
 libbfd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bpftool uses libbfd for disassembling JIT-ed programs. But the feature
is optional, and the tool can be compiled without libbfd support. The
Makefile sets the relevant variables accordingly. It also sets variables
related to libbfd's interface, given that it has changed over time.

Group all those libbfd-related definitions so that it's easier to
understand what we are testing for, and only use variables related to
libbfd's interface if we need libbfd in the first place.

In addition to make the Makefile clearer, grouping the definitions
related to disassembling JIT-ed programs will help support alternatives
to libbfd.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221025150329.97371-5-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 0218d6a1cae7..1c81f4d514bb 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -124,13 +124,6 @@ include $(FEATURES_DUMP)
 endif
 endif
 
-ifeq ($(feature-disassembler-four-args), 1)
-CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
-endif
-ifeq ($(feature-disassembler-init-styled), 1)
-    CFLAGS += -DDISASM_INIT_STYLED
-endif
-
 LIBS = $(LIBBPF) -lelf -lz
 LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
 ifeq ($(feature-libcap), 1)
@@ -142,9 +135,7 @@ include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 
-BFD_SRCS = jit_disasm.c
-
-SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c))
+SRCS := $(wildcard *.c)
 
 ifeq ($(feature-libbfd),1)
   LIBS += -lbfd -ldl -lopcodes
@@ -154,9 +145,21 @@ else ifeq ($(feature-libbfd-liberty-z),1)
   LIBS += -lbfd -ldl -lopcodes -liberty -lz
 endif
 
+# If one of the above feature combinations is set, we support libbfd
 ifneq ($(filter -lbfd,$(LIBS)),)
-CFLAGS += -DHAVE_LIBBFD_SUPPORT
-SRCS += $(BFD_SRCS)
+  CFLAGS += -DHAVE_LIBBFD_SUPPORT
+
+  # Libbfd interface changed over time, figure out what we need
+  ifeq ($(feature-disassembler-four-args), 1)
+    CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+  endif
+  ifeq ($(feature-disassembler-init-styled), 1)
+    CFLAGS += -DDISASM_INIT_STYLED
+  endif
+endif
+ifeq ($(filter -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),)
+  # No support for JIT disassembly
+  SRCS := $(filter-out jit_disasm.c,$(SRCS))
 endif
 
 HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\
-- 
cgit v1.2.3-58-ga151


From e1947c750ffe5effa371d7b7bc72043f100f6a93 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:26 +0100
Subject: bpftool: Refactor disassembler for JIT-ed programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactor disasm_print_insn() to extract the code specific to libbfd and
move it to dedicated functions. There is no functional change. This is
in preparation for supporting an alternative library for disassembling
the instructions.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221025150329.97371-6-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/jit_disasm.c | 133 +++++++++++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 45 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 723a9b799a0c..e31ad3950fd6 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -30,6 +30,14 @@
 #include "json_writer.h"
 #include "main.h"
 
+static int oper_count;
+
+typedef struct {
+	struct disassemble_info *info;
+	disassembler_ftype disassemble;
+	bfd *bfdf;
+} disasm_ctx_t;
+
 static int get_exec_path(char *tpath, size_t size)
 {
 	const char *path = "/proc/self/exe";
@@ -44,7 +52,6 @@ static int get_exec_path(char *tpath, size_t size)
 	return 0;
 }
 
-static int oper_count;
 static int printf_json(void *out, const char *fmt, va_list ap)
 {
 	char *s;
@@ -102,46 +109,44 @@ static int fprintf_json_styled(void *out,
 	return r;
 }
 
-int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		      const char *arch, const char *disassembler_options,
-		      const struct btf *btf,
-		      const struct bpf_prog_linfo *prog_linfo,
-		      __u64 func_ksym, unsigned int func_idx,
-		      bool linum)
+static int init_context(disasm_ctx_t *ctx, const char *arch,
+			const char *disassembler_options,
+			unsigned char *image, ssize_t len)
 {
-	const struct bpf_line_info *linfo = NULL;
-	disassembler_ftype disassemble;
-	int count, i, pc = 0, err = -1;
-	struct disassemble_info info;
-	unsigned int nr_skip = 0;
+	struct disassemble_info *info;
 	char tpath[PATH_MAX];
 	bfd *bfdf;
 
-	if (!len)
-		return -1;
-
 	memset(tpath, 0, sizeof(tpath));
 	if (get_exec_path(tpath, sizeof(tpath))) {
 		p_err("failed to create disasembler (get_exec_path)");
 		return -1;
 	}
 
-	bfdf = bfd_openr(tpath, NULL);
-	if (!bfdf) {
+	ctx->bfdf = bfd_openr(tpath, NULL);
+	if (!ctx->bfdf) {
 		p_err("failed to create disassembler (bfd_openr)");
 		return -1;
 	}
-	if (!bfd_check_format(bfdf, bfd_object)) {
+	if (!bfd_check_format(ctx->bfdf, bfd_object)) {
 		p_err("failed to create disassembler (bfd_check_format)");
-		goto exit_close;
+		goto err_close;
 	}
+	bfdf = ctx->bfdf;
+
+	ctx->info = malloc(sizeof(struct disassemble_info));
+	if (!ctx->info) {
+		p_err("mem alloc failed");
+		goto err_close;
+	}
+	info = ctx->info;
 
 	if (json_output)
-		init_disassemble_info_compat(&info, stdout,
+		init_disassemble_info_compat(info, stdout,
 					     (fprintf_ftype) fprintf_json,
 					     fprintf_json_styled);
 	else
-		init_disassemble_info_compat(&info, stdout,
+		init_disassemble_info_compat(info, stdout,
 					     (fprintf_ftype) fprintf,
 					     fprintf_styled);
 
@@ -153,31 +158,76 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 			bfdf->arch_info = inf;
 		} else {
 			p_err("No libbfd support for %s", arch);
-			goto exit_close;
+			goto err_free;
 		}
 	}
 
-	info.arch = bfd_get_arch(bfdf);
-	info.mach = bfd_get_mach(bfdf);
+	info->arch = bfd_get_arch(bfdf);
+	info->mach = bfd_get_mach(bfdf);
 	if (disassembler_options)
-		info.disassembler_options = disassembler_options;
-	info.buffer = image;
-	info.buffer_length = len;
+		info->disassembler_options = disassembler_options;
+	info->buffer = image;
+	info->buffer_length = len;
 
-	disassemble_init_for_target(&info);
+	disassemble_init_for_target(info);
 
 #ifdef DISASM_FOUR_ARGS_SIGNATURE
-	disassemble = disassembler(info.arch,
-				   bfd_big_endian(bfdf),
-				   info.mach,
-				   bfdf);
+	ctx->disassemble = disassembler(info->arch,
+					bfd_big_endian(bfdf),
+					info->mach,
+					bfdf);
 #else
-	disassemble = disassembler(bfdf);
+	ctx->disassemble = disassembler(bfdf);
 #endif
-	if (!disassemble) {
+	if (!ctx->disassemble) {
 		p_err("failed to create disassembler");
-		goto exit_close;
+		goto err_free;
 	}
+	return 0;
+
+err_free:
+	free(info);
+err_close:
+	bfd_close(ctx->bfdf);
+	return -1;
+}
+
+static void destroy_context(disasm_ctx_t *ctx)
+{
+	free(ctx->info);
+	bfd_close(ctx->bfdf);
+}
+
+static int
+disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image,
+		 __maybe_unused ssize_t len, int pc)
+{
+	return ctx->disassemble(pc, ctx->info);
+}
+
+int disasm_init(void)
+{
+	bfd_init();
+	return 0;
+}
+
+int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+		      const char *arch, const char *disassembler_options,
+		      const struct btf *btf,
+		      const struct bpf_prog_linfo *prog_linfo,
+		      __u64 func_ksym, unsigned int func_idx,
+		      bool linum)
+{
+	const struct bpf_line_info *linfo = NULL;
+	unsigned int nr_skip = 0;
+	int count, i, pc = 0;
+	disasm_ctx_t ctx;
+
+	if (!len)
+		return -1;
+
+	if (init_context(&ctx, arch, disassembler_options, image, len))
+		return -1;
 
 	if (json_output)
 		jsonw_start_array(json_wtr);
@@ -205,7 +255,8 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 			printf("%4x:\t", pc);
 		}
 
-		count = disassemble(pc, &info);
+		count = disassemble_insn(&ctx, image, len, pc);
+
 		if (json_output) {
 			/* Operand array, was started in fprintf_json. Before
 			 * that, make sure we have a _null_ value if no operand
@@ -241,15 +292,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
-	err = 0;
-
-exit_close:
-	bfd_close(bfdf);
-	return err;
-}
+	destroy_context(&ctx);
 
-int disasm_init(void)
-{
-	bfd_init();
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From eb9d1acf634baf6401dfb4f67dc895290713a357 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:27 +0100
Subject: bpftool: Add LLVM as default library for disassembling JIT-ed
 programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To disassemble instructions for JIT-ed programs, bpftool has relied on
the libbfd library. This has been problematic in the past: libbfd's
interface is not meant to be stable and has changed several times. For
building bpftool, we have to detect how the libbfd version on the system
behaves, which is why we have to handle features disassembler-four-args
and disassembler-init-styled in the Makefile. When it comes to shipping
bpftool, this has also caused issues with several distribution
maintainers unwilling to support the feature (see for example Debian's
page for binutils-dev, which ships libbfd: "Note that building Debian
packages which depend on the shared libbfd is Not Allowed." [0]).

For these reasons, we add support for LLVM as an alternative to libbfd
for disassembling instructions of JIT-ed programs. Thanks to the
preparation work in the previous commits, it's easy to add the library
by passing the relevant compilation options in the Makefile, and by
adding the functions for setting up the LLVM disassembler in file
jit_disasm.c.

The LLVM disassembler requires the LLVM development package (usually
llvm-dev or llvm-devel).

The expectation is that the interface for this disassembler will be more
stable. There is a note in LLVM's Developer Policy [1] stating that the
stability for the C API is "best effort" and not guaranteed, but at
least there is some effort to keep compatibility when possible (which
hasn't really been the case for libbfd so far). Furthermore, the Debian
page for the related LLVM package does not caution against linking to
the lib, as binutils-dev page does.

Naturally, the display of disassembled instructions comes with a few
minor differences. Here is a sample output with libbfd (already
supported before this patch):

    # bpftool prog dump jited id 56
    bpf_prog_6deef7357e7b4530:
       0:   nopl   0x0(%rax,%rax,1)
       5:   xchg   %ax,%ax
       7:   push   %rbp
       8:   mov    %rsp,%rbp
       b:   push   %rbx
       c:   push   %r13
       e:   push   %r14
      10:   mov    %rdi,%rbx
      13:   movzwq 0xb4(%rbx),%r13
      1b:   xor    %r14d,%r14d
      1e:   or     $0x2,%r14d
      22:   mov    $0x1,%eax
      27:   cmp    $0x2,%r14
      2b:   jne    0x000000000000002f
      2d:   xor    %eax,%eax
      2f:   pop    %r14
      31:   pop    %r13
      33:   pop    %rbx
      34:   leave
      35:   ret

LLVM supports several variants that we could set when initialising the
disassembler, for example with:

    LLVMSetDisasmOptions(*ctx,
                         LLVMDisassembler_Option_AsmPrinterVariant);

but the default printer is used for now. Here is the output with LLVM:

    # bpftool prog dump jited id 56
    bpf_prog_6deef7357e7b4530:
       0:   nopl    (%rax,%rax)
       5:   nop
       7:   pushq   %rbp
       8:   movq    %rsp, %rbp
       b:   pushq   %rbx
       c:   pushq   %r13
       e:   pushq   %r14
      10:   movq    %rdi, %rbx
      13:   movzwq  180(%rbx), %r13
      1b:   xorl    %r14d, %r14d
      1e:   orl     $2, %r14d
      22:   movl    $1, %eax
      27:   cmpq    $2, %r14
      2b:   jne     0x2f
      2d:   xorl    %eax, %eax
      2f:   popq    %r14
      31:   popq    %r13
      33:   popq    %rbx
      34:   leave
      35:   retq

The LLVM disassembler comes as the default choice, with libbfd as a
fall-back.

Of course, we could replace libbfd entirely and avoid supporting two
different libraries. One reason for keeping libbfd is that, right now,
it works well, we have all we need in terms of features detection in the
Makefile, so it provides a fallback for disassembling JIT-ed programs if
libbfd is installed but LLVM is not. The other motivation is that libbfd
supports nfp instruction for Netronome's SmartNICs and can be used to
disassemble offloaded programs, something that LLVM cannot do. If
libbfd's interface breaks again in the future, we might reconsider
keeping support for it.

[0] https://packages.debian.org/buster/binutils-dev
[1] https://llvm.org/docs/DeveloperPolicy.html#c-api-changes

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221025150329.97371-7-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile     |  48 +++++++++++-------
 tools/bpf/bpftool/jit_disasm.c | 112 +++++++++++++++++++++++++++++++++++++++--
 tools/bpf/bpftool/main.h       |   4 +-
 3 files changed, 141 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 1c81f4d514bb..787b857d3fb5 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -95,6 +95,7 @@ RM ?= rm -f
 FEATURE_USER = .bpftool
 
 FEATURE_TESTS := clang-bpf-co-re
+FEATURE_TESTS += llvm
 FEATURE_TESTS += libcap
 FEATURE_TESTS += libbfd
 FEATURE_TESTS += libbfd-liberty
@@ -103,6 +104,7 @@ FEATURE_TESTS += disassembler-four-args
 FEATURE_TESTS += disassembler-init-styled
 
 FEATURE_DISPLAY := clang-bpf-co-re
+FEATURE_DISPLAY += llvm
 FEATURE_DISPLAY += libcap
 FEATURE_DISPLAY += libbfd
 FEATURE_DISPLAY += libbfd-liberty
@@ -137,27 +139,37 @@ all: $(OUTPUT)bpftool
 
 SRCS := $(wildcard *.c)
 
-ifeq ($(feature-libbfd),1)
-  LIBS += -lbfd -ldl -lopcodes
-else ifeq ($(feature-libbfd-liberty),1)
-  LIBS += -lbfd -ldl -lopcodes -liberty
-else ifeq ($(feature-libbfd-liberty-z),1)
-  LIBS += -lbfd -ldl -lopcodes -liberty -lz
-endif
-
-# If one of the above feature combinations is set, we support libbfd
-ifneq ($(filter -lbfd,$(LIBS)),)
-  CFLAGS += -DHAVE_LIBBFD_SUPPORT
-
-  # Libbfd interface changed over time, figure out what we need
-  ifeq ($(feature-disassembler-four-args), 1)
-    CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+ifeq ($(feature-llvm),1)
+  # If LLVM is available, use it for JIT disassembly
+  CFLAGS  += -DHAVE_LLVM_SUPPORT
+  LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
+  CFLAGS  += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+  LIBS    += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+  LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
+else
+  # Fall back on libbfd
+  ifeq ($(feature-libbfd),1)
+    LIBS += -lbfd -ldl -lopcodes
+  else ifeq ($(feature-libbfd-liberty),1)
+    LIBS += -lbfd -ldl -lopcodes -liberty
+  else ifeq ($(feature-libbfd-liberty-z),1)
+    LIBS += -lbfd -ldl -lopcodes -liberty -lz
   endif
-  ifeq ($(feature-disassembler-init-styled), 1)
-    CFLAGS += -DDISASM_INIT_STYLED
+
+  # If one of the above feature combinations is set, we support libbfd
+  ifneq ($(filter -lbfd,$(LIBS)),)
+    CFLAGS += -DHAVE_LIBBFD_SUPPORT
+
+    # Libbfd interface changed over time, figure out what we need
+    ifeq ($(feature-disassembler-four-args), 1)
+      CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+    endif
+    ifeq ($(feature-disassembler-init-styled), 1)
+      CFLAGS += -DDISASM_INIT_STYLED
+    endif
   endif
 endif
-ifeq ($(filter -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),)
+ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),)
   # No support for JIT disassembly
   SRCS := $(filter-out jit_disasm.c,$(SRCS))
 endif
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index e31ad3950fd6..c28b21f90cb9 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -20,18 +20,123 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
-#include <bfd.h>
-#include <dis-asm.h>
 #include <sys/stat.h>
 #include <limits.h>
 #include <bpf/libbpf.h>
+
+#ifdef HAVE_LLVM_SUPPORT
+#include <llvm-c/Core.h>
+#include <llvm-c/Disassembler.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/TargetMachine.h>
+#endif
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#include <bfd.h>
+#include <dis-asm.h>
 #include <tools/dis-asm-compat.h>
+#endif
 
 #include "json_writer.h"
 #include "main.h"
 
 static int oper_count;
 
+#ifdef HAVE_LLVM_SUPPORT
+#define DISASM_SPACER
+
+typedef LLVMDisasmContextRef disasm_ctx_t;
+
+static int printf_json(char *s)
+{
+	s = strtok(s, " \t");
+	jsonw_string_field(json_wtr, "operation", s);
+
+	jsonw_name(json_wtr, "operands");
+	jsonw_start_array(json_wtr);
+	oper_count = 1;
+
+	while ((s = strtok(NULL, " \t,()")) != 0) {
+		jsonw_string(json_wtr, s);
+		oper_count++;
+	}
+	return 0;
+}
+
+/* This callback to set the ref_type is necessary to have the LLVM disassembler
+ * print PC-relative addresses instead of byte offsets for branch instruction
+ * targets.
+ */
+static const char *
+symbol_lookup_callback(__maybe_unused void *disasm_info,
+		       __maybe_unused uint64_t ref_value,
+		       uint64_t *ref_type, __maybe_unused uint64_t ref_PC,
+		       __maybe_unused const char **ref_name)
+{
+	*ref_type = LLVMDisassembler_ReferenceType_InOut_None;
+	return NULL;
+}
+
+static int
+init_context(disasm_ctx_t *ctx, const char *arch,
+	     __maybe_unused const char *disassembler_options,
+	     __maybe_unused unsigned char *image, __maybe_unused ssize_t len)
+{
+	char *triple;
+
+	if (arch) {
+		p_err("Architecture %s not supported", arch);
+		return -1;
+	}
+
+	triple = LLVMGetDefaultTargetTriple();
+	if (!triple) {
+		p_err("Failed to retrieve triple");
+		return -1;
+	}
+	*ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback);
+	LLVMDisposeMessage(triple);
+
+	if (!*ctx) {
+		p_err("Failed to create disassembler");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void destroy_context(disasm_ctx_t *ctx)
+{
+	LLVMDisposeMessage(*ctx);
+}
+
+static int
+disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc)
+{
+	char buf[256];
+	int count;
+
+	count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc,
+				      buf, sizeof(buf));
+	if (json_output)
+		printf_json(buf);
+	else
+		printf("%s", buf);
+
+	return count;
+}
+
+int disasm_init(void)
+{
+	LLVMInitializeNativeTarget();
+	LLVMInitializeNativeDisassembler();
+	return 0;
+}
+#endif /* HAVE_LLVM_SUPPORT */
+
+#ifdef HAVE_LIBBFD_SUPPORT
+#define DISASM_SPACER "\t"
+
 typedef struct {
 	struct disassemble_info *info;
 	disassembler_ftype disassemble;
@@ -210,6 +315,7 @@ int disasm_init(void)
 	bfd_init();
 	return 0;
 }
+#endif /* HAVE_LIBBPFD_SUPPORT */
 
 int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 		      const char *arch, const char *disassembler_options,
@@ -252,7 +358,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 			if (linfo)
 				btf_dump_linfo_plain(btf, linfo, "; ",
 						     linum);
-			printf("%4x:\t", pc);
+			printf("%4x:" DISASM_SPACER, pc);
 		}
 
 		count = disassemble_insn(&ctx, image, len, pc);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index c9e171082cf6..9a149c67aa5d 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -172,7 +172,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 struct bpf_prog_linfo;
-#ifdef HAVE_LIBBFD_SUPPORT
+#if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT)
 int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 		      const char *arch, const char *disassembler_options,
 		      const struct btf *btf,
@@ -193,7 +193,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 }
 static inline int disasm_init(void)
 {
-	p_err("No libbfd support");
+	p_err("No JIT disassembly support");
 	return -1;
 }
 #endif
-- 
cgit v1.2.3-58-ga151


From ce4f660862359f2172ff4e3df30e641df5475cf6 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:28 +0100
Subject: bpftool: Support setting alternative arch for JIT disasm with LLVM

For offloaded BPF programs, instead of failing to create the
LLVM disassembler without even looking for a triple at all, do run the
function that attempts to retrieve a valid architecture name for the
device.

It will still fail for the LLVM disassembler, because currently we have
no valid triple to return (NFP disassembly is not supported by LLVM).
But failing in that function is more logical than to assume in
jit_disasm.c that passing an "arch" name is simply not supported.

Suggested-by: Song Liu <song@kernel.org>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221025150329.97371-8-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/common.c     | 10 ++++++----
 tools/bpf/bpftool/jit_disasm.c | 15 +++++++--------
 tools/bpf/bpftool/main.h       |  3 +--
 tools/bpf/bpftool/prog.c       |  6 ++----
 4 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 4c2e909a2d67..e4d33bc8bbbf 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -627,12 +627,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
 }
 
 const char *
-ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
-		      const char **opt)
+ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
 {
+	__maybe_unused int device_id;
 	char devname[IF_NAMESIZE];
 	int vendor_id;
-	int device_id;
 
 	if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
 		p_err("Can't get net device name for ifindex %d: %s", ifindex,
@@ -647,6 +646,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
 	}
 
 	switch (vendor_id) {
+#ifdef HAVE_LIBBFD_SUPPORT
 	case 0x19ee:
 		device_id = read_sysfs_netdev_hex_int(devname, "device");
 		if (device_id != 0x4000 &&
@@ -655,8 +655,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
 			p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
 		*opt = "ctx4";
 		return "NFP-6xxx";
+#endif /* HAVE_LIBBFD_SUPPORT */
+	/* No NFP support in LLVM, we have no valid triple to return. */
 	default:
-		p_err("Can't get bfd arch name for device vendor id 0x%04x",
+		p_err("Can't get arch name for device vendor id 0x%04x",
 		      vendor_id);
 		return NULL;
 	}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index c28b21f90cb9..58a5017034a2 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -84,12 +84,10 @@ init_context(disasm_ctx_t *ctx, const char *arch,
 {
 	char *triple;
 
-	if (arch) {
-		p_err("Architecture %s not supported", arch);
-		return -1;
-	}
-
-	triple = LLVMGetDefaultTargetTriple();
+	if (arch)
+		triple = LLVMNormalizeTargetTriple(arch);
+	else
+		triple = LLVMGetDefaultTargetTriple();
 	if (!triple) {
 		p_err("Failed to retrieve triple");
 		return -1;
@@ -128,8 +126,9 @@ disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc)
 
 int disasm_init(void)
 {
-	LLVMInitializeNativeTarget();
-	LLVMInitializeNativeDisassembler();
+	LLVMInitializeAllTargetInfos();
+	LLVMInitializeAllTargetMCs();
+	LLVMInitializeAllDisassemblers();
 	return 0;
 }
 #endif /* HAVE_LLVM_SUPPORT */
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9a149c67aa5d..467d8472df0c 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -203,8 +203,7 @@ void print_hex_data_json(uint8_t *data, size_t len);
 unsigned int get_page_size(void);
 unsigned int get_possible_cpus(void);
 const char *
-ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
-		      const char **opt);
+ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt);
 
 struct btf_dumper {
 	const struct btf *btf;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 93dfb89b85e3..a858b907da16 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -764,10 +764,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 		const char *name = NULL;
 
 		if (info->ifindex) {
-			name = ifindex_to_bfd_params(info->ifindex,
-						     info->netns_dev,
-						     info->netns_ino,
-						     &disasm_opt);
+			name = ifindex_to_arch(info->ifindex, info->netns_dev,
+					       info->netns_ino, &disasm_opt);
 			if (!name)
 				goto exit_free;
 		}
-- 
cgit v1.2.3-58-ga151


From 08b8191ba7f99cd1444001e9f4800ff27607d054 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 25 Oct 2022 16:03:29 +0100
Subject: bpftool: Add llvm feature to "bpftool version"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Similarly to "libbfd", add a "llvm" feature to the output of command
"bpftool version" to indicate that LLVM is used for disassembling JIT-ed
programs. This feature is mutually exclusive (from Makefile definitions)
with "libbfd".

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Tested-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221025150329.97371-9-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/common_options.rst | 8 ++++----
 tools/bpf/bpftool/main.c                           | 7 +++++++
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
index 4107a586b68b..05350a1aadf9 100644
--- a/tools/bpf/bpftool/Documentation/common_options.rst
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -7,10 +7,10 @@
 	  Print bpftool's version number (similar to **bpftool version**), the
 	  number of the libbpf version in use, and optional features that were
 	  included when bpftool was compiled. Optional features include linking
-	  against libbfd to provide the disassembler for JIT-ted programs
-	  (**bpftool prog dump jited**) and usage of BPF skeletons (some
-	  features like **bpftool prog profile** or showing pids associated to
-	  BPF objects may rely on it).
+	  against LLVM or libbfd to provide the disassembler for JIT-ted
+	  programs (**bpftool prog dump jited**) and usage of BPF skeletons
+	  (some features like **bpftool prog profile** or showing pids
+	  associated to BPF objects may rely on it).
 
 -j, --json
 	  Generate JSON output. For commands that cannot produce JSON, this
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index b22223df4431..741e50ee0b6c 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -119,6 +119,11 @@ static int do_version(int argc, char **argv)
 #else
 	const bool has_libbfd = false;
 #endif
+#ifdef HAVE_LLVM_SUPPORT
+	const bool has_llvm = true;
+#else
+	const bool has_llvm = false;
+#endif
 #ifdef BPFTOOL_WITHOUT_SKELETONS
 	const bool has_skeletons = false;
 #else
@@ -154,6 +159,7 @@ static int do_version(int argc, char **argv)
 		jsonw_name(json_wtr, "features");
 		jsonw_start_object(json_wtr);	/* features */
 		jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
+		jsonw_bool_field(json_wtr, "llvm", has_llvm);
 		jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf);
 		jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
 		jsonw_bool_field(json_wtr, "bootstrap", bootstrap);
@@ -172,6 +178,7 @@ static int do_version(int argc, char **argv)
 		printf("using libbpf %s\n", libbpf_version_string());
 		printf("features:");
 		print_feature("libbfd", has_libbfd, &nb_features);
+		print_feature("llvm", has_llvm, &nb_features);
 		print_feature("libbpf_strict", !legacy_libbpf, &nb_features);
 		print_feature("skeletons", has_skeletons, &nb_features);
 		print_feature("bootstrap", bootstrap, &nb_features);
-- 
cgit v1.2.3-58-ga151


From 10705b2b7a8e4eb46ab5bf1b9ee354cb9a929428 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Oct 2022 15:41:45 +0200
Subject: selftests/bpf: Add load_kallsyms_refresh function

Adding load_kallsyms_refresh function to re-read symbols from
/proc/kallsyms file.

This will be needed to get proper functions addresses from
bpf_testmod.ko module, which is loaded/unloaded several times
during the tests run, so symbols might be already old when
we need to use them.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221025134148.3300700-6-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/trace_helpers.c | 20 +++++++++++++-------
 tools/testing/selftests/bpf/trace_helpers.h |  2 ++
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9c4be2cdb21a..09a16a77bae4 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -23,7 +23,7 @@ static int ksym_cmp(const void *p1, const void *p2)
 	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
 }
 
-int load_kallsyms(void)
+int load_kallsyms_refresh(void)
 {
 	FILE *f;
 	char func[256], buf[256];
@@ -31,12 +31,7 @@ int load_kallsyms(void)
 	void *addr;
 	int i = 0;
 
-	/*
-	 * This is called/used from multiplace places,
-	 * load symbols just once.
-	 */
-	if (sym_cnt)
-		return 0;
+	sym_cnt = 0;
 
 	f = fopen("/proc/kallsyms", "r");
 	if (!f)
@@ -57,6 +52,17 @@ int load_kallsyms(void)
 	return 0;
 }
 
+int load_kallsyms(void)
+{
+	/*
+	 * This is called/used from multiplace places,
+	 * load symbols just once.
+	 */
+	if (sym_cnt)
+		return 0;
+	return load_kallsyms_refresh();
+}
+
 struct ksym *ksym_search(long key)
 {
 	int start = 0, end = sym_cnt;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 238a9c98cde2..53efde0e2998 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -10,6 +10,8 @@ struct ksym {
 };
 
 int load_kallsyms(void);
+int load_kallsyms_refresh(void);
+
 struct ksym *ksym_search(long key);
 long ksym_get_addr(const char *name);
 
-- 
cgit v1.2.3-58-ga151


From fee356ede980b6c2c8db612e18b25738356d6744 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Oct 2022 15:41:46 +0200
Subject: selftests/bpf: Add bpf_testmod_fentry_* functions

Adding 3 bpf_testmod_fentry_* functions to have a way to test
kprobe multi link on kernel module. They follow bpf_fentry_test*
functions prototypes/code.

Adding equivalent functions to all bpf_fentry_test* does not
seems necessary at the moment, could be added later.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221025134148.3300700-7-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index a6021d6117b5..5085fea3cac5 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -128,6 +128,23 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
 	}
 }
 
+noinline int bpf_testmod_fentry_test1(int a)
+{
+	return a + 1;
+}
+
+noinline int bpf_testmod_fentry_test2(int a, u64 b)
+{
+	return a + b;
+}
+
+noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
+{
+	return a + b + c;
+}
+
+int bpf_testmod_fentry_ok;
+
 noinline ssize_t
 bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		      struct bin_attribute *bin_attr,
@@ -167,6 +184,13 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 			return snprintf(buf, len, "%d\n", writable.val);
 	}
 
+	if (bpf_testmod_fentry_test1(1) != 2 ||
+	    bpf_testmod_fentry_test2(2, 3) != 5 ||
+	    bpf_testmod_fentry_test3(4, 5, 6) != 15)
+		goto out;
+
+	bpf_testmod_fentry_ok = 1;
+out:
 	return -EIO; /* always fail */
 }
 EXPORT_SYMBOL(bpf_testmod_test_read);
-- 
cgit v1.2.3-58-ga151


From e697d8dcebd2f557fa5e5ed57aaf0a9992ce9df8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Oct 2022 15:41:47 +0200
Subject: selftests/bpf: Add kprobe_multi check to module attach test

Adding test that makes sure the kernel module won't be removed
if there's kprobe multi link defined on top of it.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221025134148.3300700-8-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/module_attach.c | 7 +++++++
 tools/testing/selftests/bpf/progs/test_module_attach.c | 6 ++++++
 2 files changed, 13 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d0e50dcf47c..7fc01ff490db 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -103,6 +103,13 @@ void test_module_attach(void)
 	ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
 	bpf_link__destroy(link);
 
+	link = bpf_program__attach(skel->progs.kprobe_multi);
+	if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+		goto cleanup;
+
+	ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+	bpf_link__destroy(link);
+
 cleanup:
 	test_module_attach__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 08628afedb77..8a1b50f3a002 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -110,4 +110,10 @@ int BPF_PROG(handle_fmod_ret,
 	return 0; /* don't override the exit code */
 }
 
+SEC("kprobe.multi/bpf_testmod_test_read")
+int BPF_PROG(kprobe_multi)
+{
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-58-ga151


From b2440443a64f1b687df364089e6dcb23da4f9598 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 25 Oct 2022 15:41:48 +0200
Subject: selftests/bpf: Add kprobe_multi kmod attach api tests

Adding kprobe_multi kmod attach api tests that attach bpf_testmod
functions via bpf_program__attach_kprobe_multi_opts.

Running it as serial test, because we don't want other tests to
reload bpf_testmod while it's running.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221025134148.3300700-9-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/prog_tests/kprobe_multi_testmod_test.c     | 89 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/kprobe_multi.c   | 50 ++++++++++++
 2 files changed, 139 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
new file mode 100644
index 000000000000..1fbe7e4ac00a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+#include "bpf/libbpf_internal.h"
+
+static void kprobe_multi_testmod_check(struct kprobe_multi *skel)
+{
+	ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result");
+	ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result");
+	ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result");
+
+	ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result");
+	ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result");
+	ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result");
+}
+
+static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts)
+{
+	struct kprobe_multi *skel = NULL;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+						skel->progs.test_kprobe_testmod,
+						NULL, opts);
+	if (!skel->links.test_kprobe_testmod)
+		goto cleanup;
+
+	opts->retprobe = true;
+	skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+						skel->progs.test_kretprobe_testmod,
+						NULL, opts);
+	if (!skel->links.test_kretprobe_testmod)
+		goto cleanup;
+
+	ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+	kprobe_multi_testmod_check(skel);
+
+cleanup:
+	kprobe_multi__destroy(skel);
+}
+
+static void test_testmod_attach_api_addrs(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	unsigned long long addrs[3];
+
+	addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1");
+	ASSERT_NEQ(addrs[0], 0, "ksym_get_addr");
+	addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2");
+	ASSERT_NEQ(addrs[1], 0, "ksym_get_addr");
+	addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3");
+	ASSERT_NEQ(addrs[2], 0, "ksym_get_addr");
+
+	opts.addrs = (const unsigned long *) addrs;
+	opts.cnt = ARRAY_SIZE(addrs);
+
+	test_testmod_attach_api(&opts);
+}
+
+static void test_testmod_attach_api_syms(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	const char *syms[3] = {
+		"bpf_testmod_fentry_test1",
+		"bpf_testmod_fentry_test2",
+		"bpf_testmod_fentry_test3",
+	};
+
+	opts.syms = syms;
+	opts.cnt = ARRAY_SIZE(syms);
+	test_testmod_attach_api(&opts);
+}
+
+void serial_test_kprobe_multi_testmod_test(void)
+{
+	if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
+		return;
+
+	if (test__start_subtest("testmod_attach_api_syms"))
+		test_testmod_attach_api_syms();
+	if (test__start_subtest("testmod_attach_api_addrs"))
+		test_testmod_attach_api_addrs();
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 98c3399e15c0..9e1ca8e34913 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -110,3 +110,53 @@ int test_kretprobe_manual(struct pt_regs *ctx)
 	kprobe_multi_check(ctx, true);
 	return 0;
 }
+
+extern const void bpf_testmod_fentry_test1 __ksym;
+extern const void bpf_testmod_fentry_test2 __ksym;
+extern const void bpf_testmod_fentry_test3 __ksym;
+
+__u64 kprobe_testmod_test1_result = 0;
+__u64 kprobe_testmod_test2_result = 0;
+__u64 kprobe_testmod_test3_result = 0;
+
+__u64 kretprobe_testmod_test1_result = 0;
+__u64 kretprobe_testmod_test2_result = 0;
+__u64 kretprobe_testmod_test3_result = 0;
+
+static void kprobe_multi_testmod_check(void *ctx, bool is_return)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	__u64 addr = bpf_get_func_ip(ctx);
+
+	if (is_return) {
+		if ((const void *) addr == &bpf_testmod_fentry_test1)
+			kretprobe_testmod_test1_result = 1;
+		if ((const void *) addr == &bpf_testmod_fentry_test2)
+			kretprobe_testmod_test2_result = 1;
+		if ((const void *) addr == &bpf_testmod_fentry_test3)
+			kretprobe_testmod_test3_result = 1;
+	} else {
+		if ((const void *) addr == &bpf_testmod_fentry_test1)
+			kprobe_testmod_test1_result = 1;
+		if ((const void *) addr == &bpf_testmod_fentry_test2)
+			kprobe_testmod_test2_result = 1;
+		if ((const void *) addr == &bpf_testmod_fentry_test3)
+			kprobe_testmod_test3_result = 1;
+	}
+}
+
+SEC("kprobe.multi")
+int test_kprobe_testmod(struct pt_regs *ctx)
+{
+	kprobe_multi_testmod_check(ctx, false);
+	return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_testmod(struct pt_regs *ctx)
+{
+	kprobe_multi_testmod_check(ctx, true);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From f3c51fe02c55bd944662714e5b91b96dc271ad9f Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 24 Oct 2022 15:38:29 +0100
Subject: libbpf: Btf dedup identical struct test needs check for nested
 structs/arrays

When examining module BTF, it is common to see core kernel structures
such as sk_buff, net_device duplicated in the module.  After adding
debug messaging to BTF it turned out that much of the problem
was down to the identical struct test failing during deduplication;
sometimes the compiler adds identical structs.  However
it turns out sometimes that type ids of identical struct members
can also differ, even when the containing structs are still identical.

To take an example, for struct sk_buff, debug messaging revealed
that the identical struct matching was failing for the anon
struct "headers"; specifically for the first field:

__u8       __pkt_type_offset[0]; /*   128     0 */

Looking at the code in BTF deduplication, we have code that guards
against the possibility of identical struct definitions, down to
type ids, and identical array definitions.  However in this case
we have a struct which is being defined twice but does not have
identical type ids since each duplicate struct has separate type
ids for the above array member.   A similar problem (though not
observed) could occur for struct-in-struct.

The solution is to make the "identical struct" test check members
not just for matching ids, but to also check if they in turn are
identical structs or arrays.

The results of doing this are quite dramatic (for some modules
at least); I see the number of type ids drop from around 10000
to just over 1000 in one module for example.

For testing use latest pahole or apply [1], otherwise dedups
can fail for the reasons described there.

Also fix return type of btf_dedup_identical_arrays() as
suggested by Andrii to match boolean return type used
elsewhere.

Fixes: efdd3eb8015e ("libbpf: Accommodate DWARF/compiler bug with duplicated structs")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1666622309-22289-1-git-send-email-alan.maguire@oracle.com

[1] https://lore.kernel.org/bpf/1666364523-9648-1-git-send-email-alan.maguire
---
 tools/lib/bpf/btf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d88647da2c7f..675a0df5c840 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3887,14 +3887,14 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
 }
 
 /* Check if given two types are identical ARRAY definitions */
-static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
 {
 	struct btf_type *t1, *t2;
 
 	t1 = btf_type_by_id(d->btf, id1);
 	t2 = btf_type_by_id(d->btf, id2);
 	if (!btf_is_array(t1) || !btf_is_array(t2))
-		return 0;
+		return false;
 
 	return btf_equal_array(t1, t2);
 }
@@ -3918,7 +3918,9 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id
 	m1 = btf_members(t1);
 	m2 = btf_members(t2);
 	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
-		if (m1->type != m2->type)
+		if (m1->type != m2->type &&
+		    !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
+		    !btf_dedup_identical_structs(d, m1->type, m2->type))
 			return false;
 	}
 	return true;
-- 
cgit v1.2.3-58-ga151


From 0334b4d8822a22b3593aec7361c50e9ebc31ee88 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:23 -0700
Subject: selftests/bpf: Ensure no task storage failure for bpf_lsm.s prog due
 to deadlock detection

This patch adds a test to check for deadlock failure
in bpf_task_storage_{get,delete} when called by a sleepable bpf_lsm prog.
It also checks if the prog_info.recursion_misses is non zero.

The test starts with 32 threads and they are affinitized to one cpu.
In my qemu setup, with CONFIG_PREEMPT=y, I can reproduce it within
one second if it is run without the previous patches of this set.

Here is the test error message before adding the no deadlock detection
version of the bpf_task_storage_{get,delete}:

test_nodeadlock:FAIL:bpf_task_storage_get busy unexpected bpf_task_storage_get busy: actual 2 != expected 0
test_nodeadlock:FAIL:bpf_task_storage_delete busy unexpected bpf_task_storage_delete busy: actual 2 != expected 0

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-9-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/task_local_storage.c  | 98 ++++++++++++++++++++++
 .../selftests/bpf/progs/task_storage_nodeadlock.c  | 47 +++++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 99a42a2b6e14..ae535f5de6a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -3,12 +3,15 @@
 
 #define _GNU_SOURCE         /* See feature_test_macros(7) */
 #include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
 #include <sys/syscall.h>   /* For SYS_xxx definitions */
 #include <sys/types.h>
 #include <test_progs.h>
 #include "task_local_storage.skel.h"
 #include "task_local_storage_exit_creds.skel.h"
 #include "task_ls_recursion.skel.h"
+#include "task_storage_nodeadlock.skel.h"
 
 static void test_sys_enter_exit(void)
 {
@@ -93,6 +96,99 @@ out:
 	task_ls_recursion__destroy(skel);
 }
 
+static bool stop;
+
+static void waitall(const pthread_t *tids, int nr)
+{
+	int i;
+
+	stop = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+}
+
+static void *sock_create_loop(void *arg)
+{
+	struct task_storage_nodeadlock *skel = arg;
+	int fd;
+
+	while (!stop) {
+		fd = socket(AF_INET, SOCK_STREAM, 0);
+		close(fd);
+		if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
+			stop = true;
+	}
+
+	return NULL;
+}
+
+static void test_nodeadlock(void)
+{
+	struct task_storage_nodeadlock *skel;
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	const int nr_threads = 32;
+	pthread_t tids[nr_threads];
+	int i, prog_fd, err;
+	cpu_set_t old, new;
+
+	/* Pin all threads to one cpu to increase the chance of preemption
+	 * in a sleepable bpf prog.
+	 */
+	CPU_ZERO(&new);
+	CPU_SET(0, &new);
+	err = sched_getaffinity(getpid(), sizeof(old), &old);
+	if (!ASSERT_OK(err, "getaffinity"))
+		return;
+	err = sched_setaffinity(getpid(), sizeof(new), &new);
+	if (!ASSERT_OK(err, "setaffinity"))
+		return;
+
+	skel = task_storage_nodeadlock__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto done;
+
+	/* Unnecessary recursion and deadlock detection are reproducible
+	 * in the preemptible kernel.
+	 */
+	if (!skel->kconfig->CONFIG_PREEMPT) {
+		test__skip();
+		goto done;
+	}
+
+	err = task_storage_nodeadlock__attach(skel);
+	ASSERT_OK(err, "attach prog");
+
+	for (i = 0; i < nr_threads; i++) {
+		err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
+		if (err) {
+			/* Only assert once here to avoid excessive
+			 * PASS printing during test failure.
+			 */
+			ASSERT_OK(err, "pthread_create");
+			waitall(tids, i);
+			goto done;
+		}
+	}
+
+	/* With 32 threads, 1s is enough to reproduce the issue */
+	sleep(1);
+	waitall(tids, nr_threads);
+
+	info_len = sizeof(info);
+	prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	ASSERT_OK(err, "get prog info");
+	ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
+
+	ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
+	ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
+
+done:
+	task_storage_nodeadlock__destroy(skel);
+	sched_setaffinity(getpid(), sizeof(old), &old);
+}
+
 void test_task_local_storage(void)
 {
 	if (test__start_subtest("sys_enter_exit"))
@@ -101,4 +197,6 @@ void test_task_local_storage(void)
 		test_exit_creds();
 	if (test__start_subtest("recursion"))
 		test_recursion();
+	if (test__start_subtest("nodeadlock"))
+		test_nodeadlock();
 }
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
new file mode 100644
index 000000000000..ea2dbb80f7b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
+extern bool CONFIG_PREEMPT __kconfig __weak;
+int nr_get_errs = 0;
+int nr_del_errs = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} task_storage SEC(".maps");
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+	     int protocol, int kern)
+{
+	struct task_struct *task;
+	int ret, zero = 0;
+	int *value;
+
+	if (!CONFIG_PREEMPT)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	value = bpf_task_storage_get(&task_storage, task, &zero,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!value)
+		__sync_fetch_and_add(&nr_get_errs, 1);
+
+	ret = bpf_task_storage_delete(&task_storage,
+				      bpf_get_current_task_btf());
+	if (ret == -EBUSY)
+		__sync_fetch_and_add(&nr_del_errs, 1);
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 387b532138eed5b12e1afa68cafb6a389507310f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 25 Oct 2022 11:45:24 -0700
Subject: selftests/bpf: Tracing prog can still do lookup under busy lock

This patch modifies the task_ls_recursion test to check that
the first bpf_task_storage_get(&map_a, ...) in BPF_PROG(on_update)
can still do the lockless lookup even it cannot acquire the percpu
busy lock.  If the lookup succeeds, it will increment the value
by 1 and the value in the task storage map_a will become 200+1=201.
After that, BPF_PROG(on_update) tries to delete from map_a and
should get -EBUSY because it cannot acquire the percpu busy lock
after finding the data.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221025184524.3526117-10-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/task_local_storage.c  | 48 +++++++++++++++++++++-
 .../selftests/bpf/progs/task_ls_recursion.c        | 43 +++++++++++++++++--
 2 files changed, 86 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index ae535f5de6a2..a176bd75a748 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -8,6 +8,7 @@
 #include <sys/syscall.h>   /* For SYS_xxx definitions */
 #include <sys/types.h>
 #include <test_progs.h>
+#include "task_local_storage_helpers.h"
 #include "task_local_storage.skel.h"
 #include "task_local_storage_exit_creds.skel.h"
 #include "task_ls_recursion.skel.h"
@@ -78,21 +79,64 @@ out:
 
 static void test_recursion(void)
 {
+	int err, map_fd, prog_fd, task_fd;
 	struct task_ls_recursion *skel;
-	int err;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
+	long value;
+
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
+		return;
 
 	skel = task_ls_recursion__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
-		return;
+		goto out;
 
 	err = task_ls_recursion__attach(skel);
 	if (!ASSERT_OK(err, "skel_attach"))
 		goto out;
 
 	/* trigger sys_enter, make sure it does not cause deadlock */
+	skel->bss->test_pid = getpid();
 	syscall(SYS_gettid);
+	skel->bss->test_pid = 0;
+	task_ls_recursion__detach(skel);
+
+	/* Refer to the comment in BPF_PROG(on_update) for
+	 * the explanation on the value 201 and 100.
+	 */
+	map_fd = bpf_map__fd(skel->maps.map_a);
+	err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+	ASSERT_OK(err, "lookup map_a");
+	ASSERT_EQ(value, 201, "map_a value");
+	ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+
+	map_fd = bpf_map__fd(skel->maps.map_b);
+	err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+	ASSERT_OK(err, "lookup map_b");
+	ASSERT_EQ(value, 100, "map_b value");
+
+	prog_fd = bpf_program__fd(skel->progs.on_lookup);
+	memset(&info, 0, sizeof(info));
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	ASSERT_OK(err, "get prog info");
+	ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion");
+
+	prog_fd = bpf_program__fd(skel->progs.on_update);
+	memset(&info, 0, sizeof(info));
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	ASSERT_OK(err, "get prog info");
+	ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+
+	prog_fd = bpf_program__fd(skel->progs.on_enter);
+	memset(&info, 0, sizeof(info));
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	ASSERT_OK(err, "get prog info");
+	ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
 
 out:
+	close(task_fd);
 	task_ls_recursion__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index 564583dca7c8..4542dc683b44 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -5,7 +5,13 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
 char _license[] SEC("license") = "GPL";
+int nr_del_errs = 0;
+int test_pid = 0;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
@@ -26,6 +32,13 @@ int BPF_PROG(on_lookup)
 {
 	struct task_struct *task = bpf_get_current_task_btf();
 
+	if (!test_pid || task->pid != test_pid)
+		return 0;
+
+	/* The bpf_task_storage_delete will call
+	 * bpf_local_storage_lookup.  The prog->active will
+	 * stop the recursion.
+	 */
 	bpf_task_storage_delete(&map_a, task);
 	bpf_task_storage_delete(&map_b, task);
 	return 0;
@@ -37,11 +50,32 @@ int BPF_PROG(on_update)
 	struct task_struct *task = bpf_get_current_task_btf();
 	long *ptr;
 
+	if (!test_pid || task->pid != test_pid)
+		return 0;
+
 	ptr = bpf_task_storage_get(&map_a, task, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (ptr)
+	/* ptr will not be NULL when it is called from
+	 * the bpf_task_storage_get(&map_b,...F_CREATE) in
+	 * the BPF_PROG(on_enter) below.  It is because
+	 * the value can be found in map_a and the kernel
+	 * does not need to acquire any spin_lock.
+	 */
+	if (ptr) {
+		int err;
+
 		*ptr += 1;
+		err = bpf_task_storage_delete(&map_a, task);
+		if (err == -EBUSY)
+			nr_del_errs++;
+	}
 
+	/* This will still fail because map_b is empty and
+	 * this BPF_PROG(on_update) has failed to acquire
+	 * the percpu busy lock => meaning potential
+	 * deadlock is detected and it will fail to create
+	 * new storage.
+	 */
 	ptr = bpf_task_storage_get(&map_b, task, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (ptr)
@@ -57,14 +91,17 @@ int BPF_PROG(on_enter, struct pt_regs *regs, long id)
 	long *ptr;
 
 	task = bpf_get_current_task_btf();
+	if (!test_pid || task->pid != test_pid)
+		return 0;
+
 	ptr = bpf_task_storage_get(&map_a, task, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (ptr)
+	if (ptr && !*ptr)
 		*ptr = 200;
 
 	ptr = bpf_task_storage_get(&map_b, task, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (ptr)
+	if (ptr && !*ptr)
 		*ptr = 100;
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From c4bcfb38a95edb1021a53f2d0356a78120ecfbe4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:28:50 -0700
Subject: bpf: Implement cgroup storage available to non-cgroup-attached bpf
 progs

Similar to sk/inode/task storage, implement similar cgroup local storage.

There already exists a local storage implementation for cgroup-attached
bpf programs.  See map type BPF_MAP_TYPE_CGROUP_STORAGE and helper
bpf_get_local_storage(). But there are use cases such that non-cgroup
attached bpf progs wants to access cgroup local storage data. For example,
tc egress prog has access to sk and cgroup. It is possible to use
sk local storage to emulate cgroup local storage by storing data in socket.
But this is a waste as it could be lots of sockets belonging to a particular
cgroup. Alternatively, a separate map can be created with cgroup id as the key.
But this will introduce additional overhead to manipulate the new map.
A cgroup local storage, similar to existing sk/inode/task storage,
should help for this use case.

The life-cycle of storage is managed with the life-cycle of the
cgroup struct.  i.e. the storage is destroyed along with the owning cgroup
with a call to bpf_cgrp_storage_free() when cgroup itself
is deleted.

The userspace map operations can be done by using a cgroup fd as a key
passed to the lookup, update and delete operations.

Typically, the following code is used to get the current cgroup:
    struct task_struct *task = bpf_get_current_task_btf();
    ... task->cgroups->dfl_cgrp ...
and in structure task_struct definition:
    struct task_struct {
        ....
        struct css_set __rcu            *cgroups;
        ....
    }
With sleepable program, accessing task->cgroups is not protected by rcu_read_lock.
So the current implementation only supports non-sleepable program and supporting
sleepable program will be the next step together with adding rcu_read_lock
protection for rcu tagged structures.

Since map name BPF_MAP_TYPE_CGROUP_STORAGE has been used for old cgroup local
storage support, the new map name BPF_MAP_TYPE_CGRP_STORAGE is used
for cgroup storage available to non-cgroup-attached bpf programs. The old
cgroup storage supports bpf_get_local_storage() helper to get the cgroup data.
The new cgroup storage helper bpf_cgrp_storage_get() can provide similar
functionality. While old cgroup storage pre-allocates storage memory, the new
mechanism can also pre-allocate with a user space bpf_map_update_elem() call
to avoid potential run-time memory allocation failure.
Therefore, the new cgroup storage can provide all functionality w.r.t.
the old one. So in uapi bpf.h, the old BPF_MAP_TYPE_CGROUP_STORAGE is alias to
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED to indicate the old cgroup storage can
be deprecated since the new one can provide the same functionality.

Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042850.673791-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            |   7 ++
 include/linux/bpf_types.h      |   1 +
 include/linux/cgroup-defs.h    |   4 +
 include/uapi/linux/bpf.h       |  50 ++++++++-
 kernel/bpf/Makefile            |   2 +-
 kernel/bpf/bpf_cgrp_storage.c  | 247 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/helpers.c           |   6 +
 kernel/bpf/syscall.c           |   3 +-
 kernel/bpf/verifier.c          |  13 ++-
 kernel/cgroup/cgroup.c         |   1 +
 kernel/trace/bpf_trace.c       |   4 +
 scripts/bpf_doc.py             |   2 +
 tools/include/uapi/linux/bpf.h |  50 ++++++++-
 13 files changed, 385 insertions(+), 5 deletions(-)
 create mode 100644 kernel/bpf/bpf_cgrp_storage.c

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a5dbac8f5aba..9fd68b0b3e9c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2041,6 +2041,7 @@ struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 void bpf_task_storage_free(struct task_struct *task);
+void bpf_cgrp_storage_free(struct cgroup *cgroup);
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
 const struct btf_func_model *
 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
@@ -2295,6 +2296,10 @@ static inline bool has_current_bpf_ctx(void)
 static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
 {
 }
+
+static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2535,6 +2540,8 @@ extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 extern const struct bpf_func_proto bpf_set_retval_proto;
 extern const struct bpf_func_proto bpf_get_retval_proto;
 extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_get_proto;
+extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2c6a4f2562a7..d4ee3ccd3753 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8f481d1b159a..c466fdc3a32a 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -504,6 +504,10 @@ struct cgroup {
 	/* Used to store internal freezer state */
 	struct cgroup_freezer_state freezer;
 
+#ifdef CONFIG_BPF_SYSCALL
+	struct bpf_local_storage __rcu  *bpf_cgrp_storage;
+#endif
+
 	/* All ancestors including self */
 	struct cgroup *ancestors[];
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 17f61338f8f8..94659f6b3395 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -922,7 +922,14 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
-	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
+	/* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching
+	 * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to
+	 * both cgroup-attached and other progs and supports all functionality
+	 * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark
+	 * BPF_MAP_TYPE_CGROUP_STORAGE deprecated.
+	 */
+	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 	BPF_MAP_TYPE_QUEUE,
@@ -935,6 +942,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
+	BPF_MAP_TYPE_CGRP_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -5435,6 +5443,44 @@ union bpf_attr {
  *		**-E2BIG** if user-space has tried to publish a sample which is
  *		larger than the size of the ring buffer, or which cannot fit
  *		within a struct bpf_dynptr.
+ *
+ * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *cgroup*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *cgroup* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
+ *		helper enforces the key must be a cgroup struct and the map must also
+ *		be a **BPF_MAP_TYPE_CGRP_STORAGE**.
+ *
+ *		In reality, the local-storage value is embedded directly inside of the
+ *		*cgroup* object itself, rather than being located in the
+ *		**BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
+ *		queried for some *map* on a *cgroup* object, the kernel will perform an
+ *		O(n) iteration over all of the live local-storage values for that
+ *		*cgroup* object until the local-storage value for the *map* is found.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+ *	Description
+ *		Delete a bpf_local_storage from a *cgroup*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5647,6 +5693,8 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx)	\
 	FN(ktime_get_tai_ns, 208, ##ctx)		\
 	FN(user_ringbuf_drain, 209, ##ctx)		\
+	FN(cgrp_storage_get, 210, ##ctx)		\
+	FN(cgrp_storage_delete, 211, ##ctx)		\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 341c94f208f4..3a12e6b400a2 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -25,7 +25,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
 ifeq ($(CONFIG_CGROUPS),y)
-obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o bpf_cgrp_storage.o
 endif
 obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
new file mode 100644
index 000000000000..309403800f82
--- /dev/null
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <uapi/linux/btf.h>
+#include <linux/btf_ids.h>
+
+DEFINE_BPF_STORAGE_CACHE(cgroup_cache);
+
+static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
+
+static void bpf_cgrp_storage_lock(void)
+{
+	migrate_disable();
+	this_cpu_inc(bpf_cgrp_storage_busy);
+}
+
+static void bpf_cgrp_storage_unlock(void)
+{
+	this_cpu_dec(bpf_cgrp_storage_busy);
+	migrate_enable();
+}
+
+static bool bpf_cgrp_storage_trylock(void)
+{
+	migrate_disable();
+	if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
+		this_cpu_dec(bpf_cgrp_storage_busy);
+		migrate_enable();
+		return false;
+	}
+	return true;
+}
+
+static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
+{
+	struct cgroup *cg = owner;
+
+	return &cg->bpf_cgrp_storage;
+}
+
+void bpf_cgrp_storage_free(struct cgroup *cgroup)
+{
+	struct bpf_local_storage *local_storage;
+	bool free_cgroup_storage = false;
+	unsigned long flags;
+
+	rcu_read_lock();
+	local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	bpf_cgrp_storage_lock();
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
+	free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+	bpf_cgrp_storage_unlock();
+	rcu_read_unlock();
+
+	if (free_cgroup_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static struct bpf_local_storage_data *
+cgroup_storage_lookup(struct cgroup *cgroup, struct bpf_map *map, bool cacheit_lockit)
+{
+	struct bpf_local_storage *cgroup_storage;
+	struct bpf_local_storage_map *smap;
+
+	cgroup_storage = rcu_dereference_check(cgroup->bpf_cgrp_storage,
+					       bpf_rcu_lock_held());
+	if (!cgroup_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(cgroup_storage, smap, cacheit_lockit);
+}
+
+static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct cgroup *cgroup;
+	int fd;
+
+	fd = *(int *)key;
+	cgroup = cgroup_get_from_fd(fd);
+	if (IS_ERR(cgroup))
+		return ERR_CAST(cgroup);
+
+	bpf_cgrp_storage_lock();
+	sdata = cgroup_storage_lookup(cgroup, map, true);
+	bpf_cgrp_storage_unlock();
+	cgroup_put(cgroup);
+	return sdata ? sdata->data : NULL;
+}
+
+static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
+					  void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct cgroup *cgroup;
+	int fd;
+
+	fd = *(int *)key;
+	cgroup = cgroup_get_from_fd(fd);
+	if (IS_ERR(cgroup))
+		return PTR_ERR(cgroup);
+
+	bpf_cgrp_storage_lock();
+	sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
+					 value, map_flags, GFP_ATOMIC);
+	bpf_cgrp_storage_unlock();
+	cgroup_put(cgroup);
+	return PTR_ERR_OR_ZERO(sdata);
+}
+
+static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = cgroup_storage_lookup(cgroup, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata), true);
+	return 0;
+}
+
+static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct cgroup *cgroup;
+	int err, fd;
+
+	fd = *(int *)key;
+	cgroup = cgroup_get_from_fd(fd);
+	if (IS_ERR(cgroup))
+		return PTR_ERR(cgroup);
+
+	bpf_cgrp_storage_lock();
+	err = cgroup_storage_delete(cgroup, map);
+	bpf_cgrp_storage_unlock();
+	cgroup_put(cgroup);
+	return err;
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
+{
+	return bpf_local_storage_map_alloc(attr, &cgroup_cache);
+}
+
+static void cgroup_storage_map_free(struct bpf_map *map)
+{
+	bpf_local_storage_map_free(map, &cgroup_cache, NULL);
+}
+
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
+	   void *, value, u64, flags, gfp_t, gfp_flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	WARN_ON_ONCE(!bpf_rcu_lock_held());
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	if (!cgroup)
+		return (unsigned long)NULL;
+
+	if (!bpf_cgrp_storage_trylock())
+		return (unsigned long)NULL;
+
+	sdata = cgroup_storage_lookup(cgroup, map, true);
+	if (sdata)
+		goto unlock;
+
+	/* only allocate new storage, when the cgroup is refcounted */
+	if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
+	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
+		sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
+						 value, BPF_NOEXIST, gfp_flags);
+
+unlock:
+	bpf_cgrp_storage_unlock();
+	return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
+}
+
+BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup)
+{
+	int ret;
+
+	WARN_ON_ONCE(!bpf_rcu_lock_held());
+	if (!cgroup)
+		return -EINVAL;
+
+	if (!bpf_cgrp_storage_trylock())
+		return -EBUSY;
+
+	ret = cgroup_storage_delete(cgroup, map);
+	bpf_cgrp_storage_unlock();
+	return ret;
+}
+
+BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map)
+const struct bpf_map_ops cgrp_storage_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = cgroup_storage_map_alloc,
+	.map_free = cgroup_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_cgrp_storage_lookup_elem,
+	.map_update_elem = bpf_cgrp_storage_update_elem,
+	.map_delete_elem = bpf_cgrp_storage_delete_elem,
+	.map_check_btf = bpf_local_storage_map_check_btf,
+	.map_btf_id = &cgroup_storage_map_btf_ids[0],
+	.map_owner_storage_ptr = cgroup_storage_ptr,
+};
+
+const struct bpf_func_proto bpf_cgrp_storage_get_proto = {
+	.func		= bpf_cgrp_storage_get,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id	= &bpf_cgroup_btf_id[0],
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_cgrp_storage_delete_proto = {
+	.func		= bpf_cgrp_storage_delete,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id	= &bpf_cgroup_btf_id[0],
+};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a6b04faed282..124fd199ce5c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1663,6 +1663,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_write_proto;
 	case BPF_FUNC_dynptr_data:
 		return &bpf_dynptr_data_proto;
+#ifdef CONFIG_CGROUPS
+	case BPF_FUNC_cgrp_storage_get:
+		return &bpf_cgrp_storage_get_proto;
+	case BPF_FUNC_cgrp_storage_delete:
+		return &bpf_cgrp_storage_delete_proto;
+#endif
 	default:
 		break;
 	}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a0b4266196a8..11df90962101 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1016,7 +1016,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ddc1452cf023..8f849a763b79 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6350,6 +6350,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_task_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_CGRP_STORAGE:
+		if (func_id != BPF_FUNC_cgrp_storage_get &&
+		    func_id != BPF_FUNC_cgrp_storage_delete)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_BLOOM_FILTER:
 		if (func_id != BPF_FUNC_map_peek_elem &&
 		    func_id != BPF_FUNC_map_push_elem)
@@ -6462,6 +6467,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_cgrp_storage_get:
+	case BPF_FUNC_cgrp_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -14139,7 +14149,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 
 		if (insn->imm == BPF_FUNC_task_storage_get ||
 		    insn->imm == BPF_FUNC_sk_storage_get ||
-		    insn->imm == BPF_FUNC_inode_storage_get) {
+		    insn->imm == BPF_FUNC_inode_storage_get ||
+		    insn->imm == BPF_FUNC_cgrp_storage_get) {
 			if (env->prog->aux->sleepable)
 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
 			else
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 764bdd5fd8d1..12001928511b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5245,6 +5245,7 @@ static void css_free_rwork_fn(struct work_struct *work)
 		atomic_dec(&cgrp->root->nr_cgrps);
 		cgroup1_pidlist_destroy_all(cgrp);
 		cancel_work_sync(&cgrp->release_agent_work);
+		bpf_cgrp_storage_free(cgrp);
 
 		if (cgroup_parent(cgrp)) {
 			/*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index eed1bd952c3a..ce0228c72a93 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1455,6 +1455,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_current_cgroup_id_proto;
 	case BPF_FUNC_get_current_ancestor_cgroup_id:
 		return &bpf_get_current_ancestor_cgroup_id_proto;
+	case BPF_FUNC_cgrp_storage_get:
+		return &bpf_cgrp_storage_get_proto;
+	case BPF_FUNC_cgrp_storage_delete:
+		return &bpf_cgrp_storage_delete_proto;
 #endif
 	case BPF_FUNC_send_signal:
 		return &bpf_send_signal_proto;
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index c0e6690be82a..fdb0aff8cb5a 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -685,6 +685,7 @@ class PrinterHelpers(Printer):
             'struct udp6_sock',
             'struct unix_sock',
             'struct task_struct',
+            'struct cgroup',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -742,6 +743,7 @@ class PrinterHelpers(Printer):
             'struct udp6_sock',
             'struct unix_sock',
             'struct task_struct',
+            'struct cgroup',
             'struct path',
             'struct btf_ptr',
             'struct inode',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 17f61338f8f8..94659f6b3395 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -922,7 +922,14 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
-	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
+	/* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching
+	 * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to
+	 * both cgroup-attached and other progs and supports all functionality
+	 * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark
+	 * BPF_MAP_TYPE_CGROUP_STORAGE deprecated.
+	 */
+	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 	BPF_MAP_TYPE_QUEUE,
@@ -935,6 +942,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
+	BPF_MAP_TYPE_CGRP_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -5435,6 +5443,44 @@ union bpf_attr {
  *		**-E2BIG** if user-space has tried to publish a sample which is
  *		larger than the size of the ring buffer, or which cannot fit
  *		within a struct bpf_dynptr.
+ *
+ * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *cgroup*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *cgroup* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this
+ *		helper enforces the key must be a cgroup struct and the map must also
+ *		be a **BPF_MAP_TYPE_CGRP_STORAGE**.
+ *
+ *		In reality, the local-storage value is embedded directly inside of the
+ *		*cgroup* object itself, rather than being located in the
+ *		**BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is
+ *		queried for some *map* on a *cgroup* object, the kernel will perform an
+ *		O(n) iteration over all of the live local-storage values for that
+ *		*cgroup* object until the local-storage value for the *map* is found.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+ *	Description
+ *		Delete a bpf_local_storage from a *cgroup*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5647,6 +5693,8 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx)	\
 	FN(ktime_get_tai_ns, 208, ##ctx)		\
 	FN(user_ringbuf_drain, 209, ##ctx)		\
+	FN(cgrp_storage_get, 210, ##ctx)		\
+	FN(cgrp_storage_delete, 211, ##ctx)		\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
-- 
cgit v1.2.3-58-ga151


From 4fe64af23c12ae9f2c1f0ca0d556d9cb8f088dfb Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:28:56 -0700
Subject: libbpf: Support new cgroup local storage

Add support for new cgroup local storage.

Acked-by: David Vernet <void@manifault.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042856.673989-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c        | 1 +
 tools/lib/bpf/libbpf_probes.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 027fd9565c16..5d7819edf074 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -164,6 +164,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
 	[BPF_MAP_TYPE_BLOOM_FILTER]		= "bloom_filter",
 	[BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
+	[BPF_MAP_TYPE_CGRP_STORAGE]		= "cgrp_storage",
 };
 
 static const char * const prog_type_name[] = {
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index f3a8e8e74eb8..bdb83d467f9a 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -221,6 +221,7 @@ static int probe_map_create(enum bpf_map_type map_type)
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
 	case BPF_MAP_TYPE_TASK_STORAGE:
+	case BPF_MAP_TYPE_CGRP_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
-- 
cgit v1.2.3-58-ga151


From f7f0f1657d9552e8667bc4758cbf41909bcdf7e2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:29:01 -0700
Subject: bpftool: Support new cgroup local storage

Add support for new cgroup local storage

Acked-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042901.674177-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +-
 tools/bpf/bpftool/map.c                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 7f3b67a8b48f..11250c4734fe 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,7 @@ MAP COMMANDS
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
 |		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-|		| **task_storage** | **bloom_filter** | **user_ringbuf** }
+|		| **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 3087ced658ad..f941ac5c7b73 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1458,7 +1458,7 @@ static int do_help(int argc, char **argv)
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
 		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-		"                 task_storage | bloom_filter | user_ringbuf }\n"
+		"                 task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n"
 		"       " HELP_SPEC_OPTIONS " |\n"
 		"                    {-f|--bpffs} | {-n|--nomount} }\n"
 		"",
-- 
cgit v1.2.3-58-ga151


From fd4ca6c1facfdc370d5f0b798106d07433e33aec Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:29:06 -0700
Subject: selftests/bpf: Fix test test_libbpf_str/bpf_map_type_str

Previous bpf patch made a change to uapi bpf.h like
  @@ -922,7 +922,14 @@ enum bpf_map_type {
        BPF_MAP_TYPE_SOCKHASH,
  -     BPF_MAP_TYPE_CGROUP_STORAGE,
  +     BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
  +     BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
        BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
where BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE
have the same enum value. This will cause selftest test_libbpf_str/bpf_map_type_str
failing. This patch fixed the issue by avoid the check for
BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED in the test.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042906.674830-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/libbpf_str.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index 93e9cddaadcf..efb8bd43653c 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -139,6 +139,14 @@ static void test_libbpf_bpf_map_type_str(void)
 		snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str);
 		uppercase(buf);
 
+		/* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
+		 * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value
+		 * (map_type). For this enum value, libbpf_bpf_map_type_str() picks
+		 * BPF_MAP_TYPE_CGROUP_STORAGE.
+		 */
+		if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0)
+			continue;
+
 		ASSERT_STREQ(buf, map_type_name, "exp_str_value");
 	}
 
-- 
cgit v1.2.3-58-ga151


From 12bb6ca4e2fa14ecb007228160d1e5f8c5b92d01 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:29:11 -0700
Subject: selftests/bpf: Add selftests for new cgroup local storage

Add four tests for new cgroup local storage, (1) testing bpf program helpers
and user space map APIs, (2) testing recursive fentry triggering won't deadlock,
(3) testing progs attached to cgroups, and (4) a negative test if the
bpf_cgrp_storage_get() helper key is not a cgroup btf id.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042911.675546-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/cgrp_local_storage.c  | 171 +++++++++++++++++++++
 .../selftests/bpf/progs/cgrp_ls_attach_cgroup.c    | 101 ++++++++++++
 .../testing/selftests/bpf/progs/cgrp_ls_negative.c |  26 ++++
 .../selftests/bpf/progs/cgrp_ls_recursion.c        |  70 +++++++++
 tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c |  88 +++++++++++
 5 files changed, 456 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
new file mode 100644
index 000000000000..1c30412ba132
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include "cgrp_ls_tp_btf.skel.h"
+#include "cgrp_ls_recursion.skel.h"
+#include "cgrp_ls_attach_cgroup.skel.h"
+#include "cgrp_ls_negative.skel.h"
+#include "network_helpers.h"
+
+struct socket_cookie {
+	__u64 cookie_key;
+	__u32 cookie_value;
+};
+
+static void test_tp_btf(int cgroup_fd)
+{
+	struct cgrp_ls_tp_btf *skel;
+	long val1 = 1, val2 = 0;
+	int err;
+
+	skel = cgrp_ls_tp_btf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	/* populate a value in map_b */
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
+	if (!ASSERT_OK(err, "map_update_elem"))
+		goto out;
+
+	/* check value */
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2);
+	if (!ASSERT_OK(err, "map_lookup_elem"))
+		goto out;
+	if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val"))
+		goto out;
+
+	/* delete value */
+	err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd);
+	if (!ASSERT_OK(err, "map_delete_elem"))
+		goto out;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	err = cgrp_ls_tp_btf__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	syscall(SYS_gettid);
+	syscall(SYS_gettid);
+
+	skel->bss->target_pid = 0;
+
+	/* 3x syscalls: 1x attach and 2x gettid */
+	ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+	ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+	ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+	cgrp_ls_tp_btf__destroy(skel);
+}
+
+static void test_attach_cgroup(int cgroup_fd)
+{
+	int server_fd = 0, client_fd = 0, err = 0;
+	socklen_t addr_len = sizeof(struct sockaddr_in6);
+	struct cgrp_ls_attach_cgroup *skel;
+	__u32 cookie_expected_value;
+	struct sockaddr_in6 addr;
+	struct socket_cookie val;
+
+	skel = cgrp_ls_attach_cgroup__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->links.set_cookie = bpf_program__attach_cgroup(
+		skel->progs.set_cookie, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach"))
+		goto out;
+
+	skel->links.update_cookie_sockops = bpf_program__attach_cgroup(
+		skel->progs.update_cookie_sockops, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach"))
+		goto out;
+
+	skel->links.update_cookie_tracing = bpf_program__attach(
+		skel->progs.update_cookie_tracing);
+	if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach"))
+		goto out;
+
+	server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto close_server_fd;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies),
+				  &cgroup_fd, &val);
+	if (!ASSERT_OK(err, "map_lookup(socket_cookies)"))
+		goto close_client_fd;
+
+	err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len);
+	if (!ASSERT_OK(err, "getsockname"))
+		goto close_client_fd;
+
+	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+	ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value");
+
+close_client_fd:
+	close(client_fd);
+close_server_fd:
+	close(server_fd);
+out:
+	cgrp_ls_attach_cgroup__destroy(skel);
+}
+
+static void test_recursion(int cgroup_fd)
+{
+	struct cgrp_ls_recursion *skel;
+	int err;
+
+	skel = cgrp_ls_recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = cgrp_ls_recursion__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* trigger sys_enter, make sure it does not cause deadlock */
+	syscall(SYS_gettid);
+
+out:
+	cgrp_ls_recursion__destroy(skel);
+}
+
+static void test_negative(void)
+{
+	struct cgrp_ls_negative *skel;
+
+	skel = cgrp_ls_negative__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) {
+		cgrp_ls_negative__destroy(skel);
+		return;
+	}
+}
+
+void test_cgrp_local_storage(void)
+{
+	int cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/cgrp_local_storage");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
+		return;
+
+	if (test__start_subtest("tp_btf"))
+		test_tp_btf(cgroup_fd);
+	if (test__start_subtest("attach_cgroup"))
+		test_attach_cgroup(cgroup_fd);
+	if (test__start_subtest("recursion"))
+		test_recursion(cgroup_fd);
+	if (test__start_subtest("negative"))
+		test_negative();
+
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
new file mode 100644
index 000000000000..6652d18465b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct socket_cookie {
+	__u64 cookie_key;
+	__u64 cookie_value;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct socket_cookie);
+} socket_cookies SEC(".maps");
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+	struct socket_cookie *p;
+	struct tcp_sock *tcp_sk;
+	struct bpf_sock *sk;
+
+	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+		return 1;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	tcp_sk = bpf_skc_to_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	p = bpf_cgrp_storage_get(&socket_cookies,
+		tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0,
+		BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!p)
+		return 1;
+
+	p->cookie_value = 0xF;
+	p->cookie_key = bpf_get_socket_cookie(ctx);
+	return 1;
+}
+
+SEC("sockops")
+int update_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+	struct socket_cookie *p;
+	struct tcp_sock *tcp_sk;
+	struct bpf_sock *sk;
+
+	if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+		return 1;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	tcp_sk = bpf_skc_to_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	p = bpf_cgrp_storage_get(&socket_cookies,
+		tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0);
+	if (!p)
+		return 1;
+
+	if (p->cookie_key != bpf_get_socket_cookie(ctx))
+		return 1;
+
+	p->cookie_value |= (ctx->local_port << 8);
+	return 1;
+}
+
+SEC("fexit/inet_stream_connect")
+int BPF_PROG(update_cookie_tracing, struct socket *sock,
+	     struct sockaddr *uaddr, int addr_len, int flags)
+{
+	struct socket_cookie *p;
+	struct tcp_sock *tcp_sk;
+
+	if (uaddr->sa_family != AF_INET6)
+		return 0;
+
+	p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0);
+	if (!p)
+		return 0;
+
+	if (p->cookie_key != bpf_get_socket_cookie(sock->sk))
+		return 0;
+
+	p->cookie_value |= 0xF0;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
new file mode 100644
index 000000000000..d41f90e2ab64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	(void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
new file mode 100644
index 000000000000..a043d8fefdac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_lookup")
+int BPF_PROG(on_lookup)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
+	bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp);
+	return 0;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	long *ptr;
+
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 200;
+
+	ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 100;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
new file mode 100644
index 000000000000..9ebb8e2fe541
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_b SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+	int err;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	/* populate value 0 */
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	/* delete value 0 */
+	err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
+	if (err)
+		return 0;
+
+	/* value is not available */
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0);
+	if (ptr)
+		return 0;
+
+	/* re-populate the value */
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+	__sync_fetch_and_add(&enter_cnt, 1);
+	*ptr = MAGIC_VALUE + enter_cnt;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	__sync_fetch_and_add(&exit_cnt, 1);
+	if (*ptr != MAGIC_VALUE + exit_cnt)
+		__sync_fetch_and_add(&mismatch_cnt, 1);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 0a1b69d1c736130dfe41d9c999d1be17b2794cda Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 25 Oct 2022 21:29:17 -0700
Subject: selftests/bpf: Add test cgrp_local_storage to DENYLIST.s390x

Test cgrp_local_storage have some programs utilizing trampoline.
Arch s390x does not support trampoline so add the test to
the corresponding DENYLIST file.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221026042917.675685-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 520f12229b98..be4e3d47ea3e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -10,6 +10,7 @@ bpf_nf                                   # JIT does not support calling kernel f
 bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
 cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
 cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
+cgrp_local_storage                       # prog_attach unexpected error: -524                                          (trampoline)
 core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
 d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
 deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-- 
cgit v1.2.3-58-ga151


From 5ed88f81511ce695692f0510ab3ca17eee68eff6 Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Tue, 25 Oct 2022 23:15:46 +0000
Subject: selftests/bpf: Panic on hard/soft lockup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When running tests, we should probably accept any help we can get when
it comes to detecting issues early or making them more debuggable. We
have seen a few cases where a test_progs_noalu32 run, for example,
encountered a soft lockup and stopped making progress. It was only
interrupted once we hit the overall test timeout [0]. We can not and do
not want to necessarily rely on test timeouts, because those rely on
infrastructure provided by the environment we run in (and which is not
present in tools/testing/selftests/bpf/vmtest.sh, for example).
To that end, let's enable panics on soft as well as hard lockups to fail
fast should we encounter one. That's happening in the configuration
indented to be used for selftests (including when using vmtest.sh or
when running in BPF CI).

[0] https://github.com/kernel-patches/bpf/runs/7844499997

Signed-off-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/r/20221025231546.811766-1-deso@posteo.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config        | 2 ++
 tools/testing/selftests/bpf/config.x86_64 | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 9213565c0311..7a99a6728169 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,4 +1,6 @@
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_BPF=y
 CONFIG_BPF_EVENTS=y
 CONFIG_BPF_JIT=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 21ce5ea4304e..dd97d61d325c 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BONDING=y
-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
 CONFIG_BOOTTIME_TRACING=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
 CONFIG_BPF_KPROBE_OVERRIDE=y
-- 
cgit v1.2.3-58-ga151


From d96d4276eaeb09b07e0949d432622691ea5c96f5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 26 Oct 2022 09:30:14 -0700
Subject: selftests/bpf: Fix bpftool synctypes checking failure

kernel-patches/bpf failed with error:
  Running bpftool checks...
  Comparing /data/users/ast/net-next/tools/include/uapi/linux/bpf.h (bpf_map_type) and
            /data/users/ast/net-next/tools/bpf/bpftool/map.c (do_help() TYPE):
            {'cgroup_storage_deprecated', 'cgroup_storage'}
  Comparing /data/users/ast/net-next/tools/include/uapi/linux/bpf.h (bpf_map_type) and
            /data/users/ast/net-next/tools/bpf/bpftool/Documentation/bpftool-map.rst (TYPE):
            {'cgroup_storage_deprecated', 'cgroup_storage'}
The selftests/bpf/test_bpftool_synctypes.py runs checking in the above.

The failure is introduced by Commit c4bcfb38a95e("bpf: Implement cgroup storage available
to non-cgroup-attached bpf progs"). The commit introduced BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
which has the same enum value as BPF_MAP_TYPE_CGROUP_STORAGE.

In test_bpftool_synctypes.py, one test is to compare uapi bpf.h map types and
bpftool supported maps. The tool picks 'cgroup_storage_deprecated' from bpf.h
while bpftool supported map is displayed as 'cgroup_storage'. The test failure
can be fixed by explicitly replacing 'cgroup_storage_deprecated' with 'cgroup_storage'
in uapi bpf.h map types.

Signed-off-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221026163014.470732-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_bpftool_synctypes.py | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index a6410bebe603..9fe4c9336c6f 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -501,6 +501,14 @@ def main():
     source_map_types = set(bpf_info.get_map_type_map().values())
     source_map_types.discard('unspec')
 
+    # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE
+    # share the same enum value and source_map_types picks
+    # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated.
+    # Replace 'cgroup_storage_deprecated' with 'cgroup_storage'
+    # so it aligns with what `bpftool map help` shows.
+    source_map_types.remove('cgroup_storage_deprecated')
+    source_map_types.add('cgroup_storage')
+
     help_map_types = map_info.get_map_help()
     help_map_options = map_info.get_options()
     map_info.close()
-- 
cgit v1.2.3-58-ga151


From 96f341a4751d1939a303c5c526b2ca79ebda075f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 26 Oct 2022 09:16:45 +0100
Subject: bpftool: Fix spelling mistake "disasembler" -> "disassembler"

There is a spelling mistake in an error message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221026081645.3186878-1-colin.i.king@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/bpf/bpftool/jit_disasm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 58a5017034a2..7b8d9ec89ebd 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -223,7 +223,7 @@ static int init_context(disasm_ctx_t *ctx, const char *arch,
 
 	memset(tpath, 0, sizeof(tpath));
 	if (get_exec_path(tpath, sizeof(tpath))) {
-		p_err("failed to create disasembler (get_exec_path)");
+		p_err("failed to create disassembler (get_exec_path)");
 		return -1;
 	}
 
-- 
cgit v1.2.3-58-ga151


From 95d9a3dab109f2806980d55634972120824a5a5a Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Mon, 24 Oct 2022 11:16:03 +0000
Subject: selftests: tc-testing: Add matchJSON to tdc

This allows the use of a matchJSON field in tests to match
against JSON output from the command under test, if that
command outputs JSON.

You specify what you want to match against as a JSON array
or object in the test's matchJSON field. You can leave out
any fields you don't want to match against that are present
in the output and they will be skipped.

An example matchJSON value would look like this:

"matchJSON": [
  {
    "Value": {
      "neighIP": {
        "family": 4,
        "addr": "AQIDBA==",
        "width": 32
      },
      "nsflags": 142,
      "ncflags": 0,
      "LLADDR": "ESIzRFVm"
    }
  }
]

The real output from the command under test might have some
extra fields that we don't care about for matching, and
since we didn't include them in our matchJSON value, those
fields will not be attempted to be matched. If everything
we included above has the same values as the real command
output, the test will pass.

The matchJSON field's type must be the same as the command
output's type, otherwise the test will fail. So if the
command outputs an array, then the value of matchJSON must
also be an array.

If matchJSON is an array, it must not contain more elements
than the command output's array, otherwise the test will
fail.

Signed-off-by: Jeremy Carter <jeremy@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20221024111603.2185410-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tdc.py | 125 ++++++++++++++++++++++++++++--
 1 file changed, 118 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index ee22e3447ec7..7bd94f8e490a 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -246,6 +246,110 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
                 stage, output,
                 '"{}" did not complete successfully'.format(prefix))
 
+def verify_by_json(procout, res, tidx, args, pm):
+    try:
+        outputJSON = json.loads(procout)
+    except json.JSONDecodeError:
+        res.set_result(ResultState.fail)
+        res.set_failmsg('Cannot decode verify command\'s output. Is it JSON?')
+        return res
+
+    matchJSON = json.loads(json.dumps(tidx['matchJSON']))
+
+    if type(outputJSON) != type(matchJSON):
+        failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {} '
+        failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__)
+        res.set_result(ResultState.fail)
+        res.set_failmsg(failmsg)
+        return res
+
+    if len(matchJSON) > len(outputJSON):
+        failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}"
+        failmsg = failmsg.format(len(outputJSON), outputJSON, len(matchJSON), matchJSON)
+        res.set_result(ResultState.fail)
+        res.set_failmsg(failmsg)
+        return res
+    res = find_in_json(res, outputJSON, matchJSON, 0)
+
+    return res
+
+def find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+    if res.get_result() == ResultState.fail:
+        return res
+
+    if type(matchJSONVal) == list:
+        res = find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey)
+
+    elif type(matchJSONVal) == dict:
+        res = find_in_json_dict(res, outputJSONVal, matchJSONVal)
+    else:
+        res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey)
+
+    if res.get_result() != ResultState.fail:
+        res.set_result(ResultState.success)
+        return res
+
+    return res
+
+def find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+    if (type(matchJSONVal) != type(outputJSONVal)):
+        failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}'
+        failmsg = failmsg.format(outputJSONVal, matchJSONVal)
+        res.set_result(ResultState.fail)
+        res.set_failmsg(failmsg)
+        return res
+
+    if len(matchJSONVal) > len(outputJSONVal):
+        failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}"
+        failmsg = failmsg.format(len(outputJSONVal), outputJSONVal, len(matchJSONVal), matchJSONVal)
+        res.set_result(ResultState.fail)
+        res.set_failmsg(failmsg)
+        return res
+
+    for matchJSONIdx, matchJSONVal in enumerate(matchJSONVal):
+        res = find_in_json(res, outputJSONVal[matchJSONIdx], matchJSONVal,
+                           matchJSONKey)
+    return res
+
+def find_in_json_dict(res, outputJSONVal, matchJSONVal):
+    for matchJSONKey, matchJSONVal in matchJSONVal.items():
+        if type(outputJSONVal) == dict:
+            if matchJSONKey not in outputJSONVal:
+                failmsg = 'Key not found in json output: {}: {}\nMatching against output: {}'
+                failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal)
+                res.set_result(ResultState.fail)
+                res.set_failmsg(failmsg)
+                return res
+
+        else:
+            failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}'
+            failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__)
+            res.set_result(ResultState.fail)
+            res.set_failmsg(failmsg)
+            return rest
+
+        if type(outputJSONVal) == dict and (type(outputJSONVal[matchJSONKey]) == dict or
+                type(outputJSONVal[matchJSONKey]) == list):
+            if len(matchJSONVal) > 0:
+                res = find_in_json(res, outputJSONVal[matchJSONKey], matchJSONVal, matchJSONKey)
+            # handling corner case where matchJSONVal == [] or matchJSONVal == {}
+            else:
+                res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey)
+        else:
+            res = find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey)
+    return res
+
+def find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+    if matchJSONKey in outputJSONVal:
+        if matchJSONVal != outputJSONVal[matchJSONKey]:
+            failmsg = 'Value doesn\'t match: {}: {} != {}\nMatching against output: {}'
+            failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal[matchJSONKey], outputJSONVal)
+            res.set_result(ResultState.fail)
+            res.set_failmsg(failmsg)
+            return res
+
+    return res
+
 def run_one_test(pm, args, index, tidx):
     global NAMES
     result = True
@@ -292,16 +396,22 @@ def run_one_test(pm, args, index, tidx):
     else:
         if args.verbose > 0:
             print('-----> verify stage')
-        match_pattern = re.compile(
-            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
         (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
         if procout:
-            match_index = re.findall(match_pattern, procout)
-            if len(match_index) != int(tidx["matchCount"]):
-                res.set_result(ResultState.fail)
-                res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout))
+            if 'matchJSON' in tidx:
+                verify_by_json(procout, res, tidx, args, pm)
+            elif 'matchPattern' in tidx:
+                match_pattern = re.compile(
+                    str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+                match_index = re.findall(match_pattern, procout)
+                if len(match_index) != int(tidx["matchCount"]):
+                    res.set_result(ResultState.fail)
+                    res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout))
+                else:
+                    res.set_result(ResultState.success)
             else:
-                res.set_result(ResultState.success)
+                res.set_result(ResultState.fail)
+                res.set_failmsg('Must specify a match option: matchJSON or matchPattern\n{}'.format(procout))
         elif int(tidx["matchCount"]) != 0:
             res.set_result(ResultState.fail)
             res.set_failmsg('No output generated by verify command.')
@@ -365,6 +475,7 @@ def test_runner(pm, args, filtered_tests):
             res.set_result(ResultState.skip)
             res.set_errormsg(errmsg)
             tsr.add_resultdata(res)
+            index += 1
             continue
         try:
             badtest = tidx  # in case it goes bad
-- 
cgit v1.2.3-58-ga151


From 4a331d3469963b5db37d462963397a8fe52aace0 Mon Sep 17 00:00:00 2001
From: "Hans J. Schultz" <netdev@kapio-technology.com>
Date: Tue, 1 Nov 2022 21:39:22 +0200
Subject: selftests: forwarding: Add MAC Authentication Bypass (MAB) test cases

Add four test cases to verify MAB functionality:

* Verify that a locked FDB entry can be generated by the bridge,
  preventing a host from communicating via the bridge. Test that user
  space can clear the "locked" flag by replacing the entry, thereby
  authenticating the host and allowing it to communicate via the bridge.

* Test that an entry cannot roam to a locked port, but that it can roam
  to an unlocked port.

* Test that MAB can only be enabled on a port that is both locked and
  has learning enabled.

* Test that locked FDB entries are flushed from a port when MAB is
  disabled.

Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_locked_port.sh | 155 ++++++++++++++++++++-
 tools/testing/selftests/net/forwarding/lib.sh      |   8 ++
 2 files changed, 162 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
index 5b02b6b60ce7..dc92d32464f6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -1,7 +1,16 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="locked_port_ipv4 locked_port_ipv6 locked_port_vlan"
+ALL_TESTS="
+	locked_port_ipv4
+	locked_port_ipv6
+	locked_port_vlan
+	locked_port_mab
+	locked_port_mab_roam
+	locked_port_mab_config
+	locked_port_mab_flush
+"
+
 NUM_NETIFS=4
 CHECK_TC="no"
 source lib.sh
@@ -166,6 +175,150 @@ locked_port_ipv6()
 	log_test "Locked port ipv6"
 }
 
+locked_port_mab()
+{
+	RET=0
+	check_port_mab_support || return 0
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work before locking port"
+
+	bridge link set dev $swp1 learning on locked on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked on a locked port without an FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null
+	check_fail $? "FDB entry created before enabling MAB"
+
+	bridge link set dev $swp1 learning on locked on mab on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked on MAB enabled port without an FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_err $? "Locked FDB entry not created"
+
+	bridge fdb replace `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after replacing FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_fail $? "FDB entry marked as locked after replacement"
+
+	bridge fdb del `mac_get $h1` dev $swp1 master
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB"
+}
+
+# Check that entries cannot roam to a locked port, but that entries can roam
+# to an unlocked port.
+locked_port_mab_roam()
+{
+	local mac=a0:b0:c0:c0:b0:a0
+
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked on mab on
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_err $? "No locked entry on first injection"
+
+	$MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2"
+	check_err $? "Entry did not roam to an unlocked port"
+
+	bridge fdb get $mac br br0 vlan 1 | grep -q "locked"
+	check_fail $? "Entry roamed with locked flag on"
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1"
+	check_fail $? "Entry roamed back to locked port"
+
+	bridge fdb del $mac vlan 1 dev $swp2 master
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB roam"
+}
+
+# Check that MAB can only be enabled on a port that is both locked and has
+# learning enabled.
+locked_port_mab_config()
+{
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked off mab on &> /dev/null
+	check_fail $? "MAB enabled while port is unlocked"
+
+	bridge link set dev $swp1 learning off locked on mab on &> /dev/null
+	check_fail $? "MAB enabled while port has learning disabled"
+
+	bridge link set dev $swp1 learning on locked on mab on
+	check_err $? "Failed to enable MAB when port is locked and has learning enabled"
+
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB configuration"
+}
+
+# Check that locked FDB entries are flushed from a port when MAB is disabled.
+locked_port_mab_flush()
+{
+	local locked_mac1=00:01:02:03:04:05
+	local unlocked_mac1=00:01:02:03:04:06
+	local locked_mac2=00:01:02:03:04:07
+	local unlocked_mac2=00:01:02:03:04:08
+
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked on mab on
+	bridge link set dev $swp2 learning on locked on mab on
+
+	# Create regular and locked FDB entries on each port.
+	bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static
+	bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand
+	bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \
+		grep -q "locked"
+	check_err $? "Failed to create locked FDB entry on first port"
+
+	$MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand
+	bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \
+		grep -q "locked"
+	check_err $? "Failed to create locked FDB entry on second port"
+
+	# Disable MAB on the first port and check that only the first locked
+	# FDB entry was flushed.
+	bridge link set dev $swp1 mab off
+
+	bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null
+	check_err $? "Regular FDB entry on first port was flushed after disabling MAB"
+
+	bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null
+	check_err $? "Regular FDB entry on second port was flushed after disabling MAB"
+
+	bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null
+	check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB"
+
+	bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null
+	check_err $? "Locked FDB entry on second port was flushed after disabling MAB"
+
+	bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static
+	bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static
+
+	bridge link set dev $swp2 learning on locked off mab off
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB FDB flush"
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 3ffb9d6c0950..1c4f866de7d7 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -137,6 +137,14 @@ check_locked_port_support()
 	fi
 }
 
+check_port_mab_support()
+{
+	if ! bridge -d link show | grep -q "mab"; then
+		echo "SKIP: iproute2 too old; MacAuth feature not supported."
+		return $ksft_skip
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit $ksft_skip
-- 
cgit v1.2.3-58-ga151


From 9b5e3536c898b23143748b8cc01c64f868ec7078 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:52:55 -0700
Subject: selftests/bpf: add veristat replay mode

Replay mode allow to parse previously stored CSV file with verification
results and present it in desired output (presumable human-readable
table, but CSV to CSV convertion is supported as well). While doing
that, it's possible to use veristat's sorting rules, specify subset of
columns, and filter by file and program name.

In subsequent patches veristat's filtering capabilities will just grow
making replay mode even more useful in practice for post-processing
results.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 126 +++++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 973cbf6af323..7e1432c06e0c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -67,6 +67,7 @@ static struct env {
 	int log_level;
 	enum resfmt out_fmt;
 	bool comparison_mode;
+	bool replay_mode;
 
 	struct verif_stats *prog_stats;
 	int prog_stat_cnt;
@@ -115,6 +116,7 @@ static const struct argp_option opts[] = {
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 	{ "compare", 'C', NULL, 0, "Comparison mode" },
+	{ "replay", 'R', NULL, 0, "Replay mode" },
 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 	{},
 };
@@ -169,6 +171,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'C':
 		env.comparison_mode = true;
 		break;
+	case 'R':
+		env.replay_mode = true;
+		break;
 	case 'f':
 		if (arg[0] == '@')
 			err = append_filter_file(arg + 1);
@@ -841,42 +846,6 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
 	}
 }
 
-static int handle_verif_mode(void)
-{
-	int i, err;
-
-	if (env.filename_cnt == 0) {
-		fprintf(stderr, "Please provide path to BPF object file!\n");
-		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < env.filename_cnt; i++) {
-		err = process_obj(env.filenames[i]);
-		if (err) {
-			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
-			return err;
-		}
-	}
-
-	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
-
-	if (env.out_fmt == RESFMT_TABLE) {
-		/* calculate column widths */
-		output_headers(RESFMT_TABLE_CALCLEN);
-		for (i = 0; i < env.prog_stat_cnt; i++)
-			output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false);
-	}
-
-	/* actually output the table */
-	output_headers(env.out_fmt);
-	for (i = 0; i < env.prog_stat_cnt; i++) {
-		output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1);
-	}
-
-	return 0;
-}
-
 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
 {
 	switch (id) {
@@ -1206,7 +1175,7 @@ static int handle_comparison_mode(void)
 	int err, i, j;
 
 	if (env.filename_cnt != 2) {
-		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n");
+		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
 		return -EINVAL;
 	}
@@ -1307,6 +1276,79 @@ one_more_time:
 	return 0;
 }
 
+static void output_prog_stats(void)
+{
+	const struct verif_stats *stats;
+	int i, last_stat_idx = 0;
+
+	if (env.out_fmt == RESFMT_TABLE) {
+		/* calculate column widths */
+		output_headers(RESFMT_TABLE_CALCLEN);
+		for (i = 0; i < env.prog_stat_cnt; i++) {
+			stats = &env.prog_stats[i];
+			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
+			last_stat_idx = i;
+		}
+	}
+
+	/* actually output the table */
+	output_headers(env.out_fmt);
+	for (i = 0; i < env.prog_stat_cnt; i++) {
+		stats = &env.prog_stats[i];
+		output_stats(stats, env.out_fmt, i == last_stat_idx);
+	}
+}
+
+static int handle_verif_mode(void)
+{
+	int i, err;
+
+	if (env.filename_cnt == 0) {
+		fprintf(stderr, "Please provide path to BPF object file!\n\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < env.filename_cnt; i++) {
+		err = process_obj(env.filenames[i]);
+		if (err) {
+			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
+			return err;
+		}
+	}
+
+	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+	output_prog_stats();
+
+	return 0;
+}
+
+static int handle_replay_mode(void)
+{
+	struct stat_specs specs = {};
+	int err;
+
+	if (env.filename_cnt != 1) {
+		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return -EINVAL;
+	}
+
+	err = parse_stats_csv(env.filenames[0], &specs,
+			      &env.prog_stats, &env.prog_stat_cnt);
+	if (err) {
+		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+		return err;
+	}
+
+	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+	output_prog_stats();
+
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	int err = 0, i;
@@ -1315,7 +1357,7 @@ int main(int argc, char **argv)
 		return 1;
 
 	if (env.verbose && env.quiet) {
-		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n");
+		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
 		return 1;
 	}
@@ -1327,8 +1369,16 @@ int main(int argc, char **argv)
 	if (env.sort_spec.spec_cnt == 0)
 		env.sort_spec = default_sort_spec;
 
+	if (env.comparison_mode && env.replay_mode) {
+		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return 1;
+	}
+
 	if (env.comparison_mode)
 		err = handle_comparison_mode();
+	else if (env.replay_mode)
+		err = handle_replay_mode();
 	else
 		err = handle_verif_mode();
 
-- 
cgit v1.2.3-58-ga151


From 62d2c08bb91cc3fc26319c571000cacac0312426 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:52:56 -0700
Subject: selftests/bpf: shorten "Total insns/states" column names in veristat

In comparison mode the "Total " part is pretty useless, but takes
a considerable amount of horizontal space. Drop the "Total " parts.

Also make sure that table headers for numerical columns are aligned in
the same fashion as integer values in those columns. This looks better
and is now more obvious with shorter "Insns" and "States" column
headers.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 7e1432c06e0c..d553f38a6cee 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -405,13 +405,14 @@ static struct stat_def {
 	const char *header;
 	const char *names[4];
 	bool asc_by_default;
+	bool left_aligned;
 } stat_defs[] = {
-	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ },
-	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ },
-	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ },
+	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
+	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
+	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
-	[TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, },
-	[TOTAL_STATES] = { "Total states", {"total_states", "states"}, },
+	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
+	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
@@ -743,6 +744,7 @@ static void output_header_underlines(void)
 
 static void output_headers(enum resfmt fmt)
 {
+	const char *fmt_str;
 	int i, len;
 
 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
@@ -756,7 +758,8 @@ static void output_headers(enum resfmt fmt)
 				*max_len = len;
 			break;
 		case RESFMT_TABLE:
-			printf("%s%-*s", i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
+			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
+			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
 			if (i == env.output_spec.spec_cnt - 1)
 				printf("\n");
 			break;
-- 
cgit v1.2.3-58-ga151


From 10b1b3f3e56a6a3586356bf8cc77d7753ba8fcc9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:52:57 -0700
Subject: selftests/bpf: consolidate and improve file/prog filtering in
 veristat

Slightly change rules of specifying file/prog glob filters. In practice
it's quite often inconvenient to do `*/<prog-glob>` if that program glob
is unique enough and won't accidentally match any file names.

This patch changes the rules so that `-f <glob>` will apply specified
glob to both file and program names. User still has all the control by
doing '*/<prog-only-glob>' or '<file-only-glob/*'. We also now allow
'/<prog-glob>' and '<file-glob/' (all matching wildcard is assumed if
missing).

Also, internally unify file-only and file+prog checks
(should_process_file and should_process_prog are now
should_process_file_prog that can handle prog name as optional). This
makes maintaining and extending this code easier.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 127 +++++++++++++++++----------------
 1 file changed, 65 insertions(+), 62 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index d553f38a6cee..f6f6a2490489 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -55,6 +55,7 @@ enum resfmt {
 };
 
 struct filter {
+	char *any_glob;
 	char *file_glob;
 	char *prog_glob;
 };
@@ -231,28 +232,6 @@ static bool glob_matches(const char *str, const char *pat)
 	return !*str && !*pat;
 }
 
-static bool should_process_file(const char *filename)
-{
-	int i;
-
-	if (env.deny_filter_cnt > 0) {
-		for (i = 0; i < env.deny_filter_cnt; i++) {
-			if (glob_matches(filename, env.deny_filters[i].file_glob))
-				return false;
-		}
-	}
-
-	if (env.allow_filter_cnt == 0)
-		return true;
-
-	for (i = 0; i < env.allow_filter_cnt; i++) {
-		if (glob_matches(filename, env.allow_filters[i].file_glob))
-			return true;
-	}
-
-	return false;
-}
-
 static bool is_bpf_obj_file(const char *path) {
 	Elf64_Ehdr *ehdr;
 	int fd, err = -EINVAL;
@@ -285,38 +264,46 @@ cleanup:
 	return err == 0;
 }
 
-static bool should_process_prog(const char *path, const char *prog_name)
+static bool should_process_file_prog(const char *filename, const char *prog_name)
 {
-	const char *filename = basename(path);
-	int i;
+	struct filter *f;
+	int i, allow_cnt = 0;
 
-	if (env.deny_filter_cnt > 0) {
-		for (i = 0; i < env.deny_filter_cnt; i++) {
-			if (glob_matches(filename, env.deny_filters[i].file_glob))
-				return false;
-			if (!env.deny_filters[i].prog_glob)
-				continue;
-			if (glob_matches(prog_name, env.deny_filters[i].prog_glob))
-				return false;
-		}
-	}
+	for (i = 0; i < env.deny_filter_cnt; i++) {
+		f = &env.deny_filters[i];
 
-	if (env.allow_filter_cnt == 0)
-		return true;
+		if (f->any_glob && glob_matches(filename, f->any_glob))
+			return false;
+		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
+			return false;
+		if (f->file_glob && glob_matches(filename, f->file_glob))
+			return false;
+		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
+			return false;
+	}
 
 	for (i = 0; i < env.allow_filter_cnt; i++) {
-		if (!glob_matches(filename, env.allow_filters[i].file_glob))
-			continue;
-		/* if filter specifies only filename glob part, it implicitly
-		 * allows all progs within that file
-		 */
-		if (!env.allow_filters[i].prog_glob)
-			return true;
-		if (glob_matches(prog_name, env.allow_filters[i].prog_glob))
+		f = &env.allow_filters[i];
+		allow_cnt++;
+
+		if (f->any_glob) {
+			if (glob_matches(filename, f->any_glob))
+				return true;
+			if (prog_name && glob_matches(prog_name, f->any_glob))
+				return true;
+		} else {
+			if (f->file_glob && !glob_matches(filename, f->file_glob))
+				continue;
+			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
+				continue;
 			return true;
+		}
 	}
 
-	return false;
+	/* if there are no file/prog name allow filters, allow all progs,
+	 * unless they are denied earlier explicitly
+	 */
+	return allow_cnt == 0;
 }
 
 static int append_filter(struct filter **filters, int *cnt, const char *str)
@@ -331,26 +318,40 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 	*filters = tmp;
 
 	f = &(*filters)[*cnt];
-	f->file_glob = f->prog_glob = NULL;
-
-	/* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */
+	memset(f, 0, sizeof(*f));
+
+	/* File/prog filter can be specified either as '<glob>' or
+	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
+	 * both file and program names. This seems to be way more useful in
+	 * practice. If user needs full control, they can use '/<prog-glob>'
+	 * form to glob just program name, or '<file-glob>/' to glob only file
+	 * name. But usually common <glob> seems to be the most useful and
+	 * ergonomic way.
+	 */
 	p = strchr(str, '/');
 	if (!p) {
-		f->file_glob = strdup(str);
-		if (!f->file_glob)
+		f->any_glob = strdup(str);
+		if (!f->any_glob)
 			return -ENOMEM;
 	} else {
-		f->file_glob = strndup(str, p - str);
-		f->prog_glob = strdup(p + 1);
-		if (!f->file_glob || !f->prog_glob) {
-			free(f->file_glob);
-			free(f->prog_glob);
-			f->file_glob = f->prog_glob = NULL;
-			return -ENOMEM;
+		if (str != p) {
+			/* non-empty file glob */
+			f->file_glob = strndup(str, p - str);
+			if (!f->file_glob)
+				return -ENOMEM;
+		}
+		if (strlen(p + 1) > 0) {
+			/* non-empty prog glob */
+			f->prog_glob = strdup(p + 1);
+			if (!f->prog_glob) {
+				free(f->file_glob);
+				f->file_glob = NULL;
+				return -ENOMEM;
+			}
 		}
 	}
 
-	*cnt = *cnt + 1;
+	*cnt += 1;
 	return 0;
 }
 
@@ -546,7 +547,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	int err = 0;
 	void *tmp;
 
-	if (!should_process_prog(filename, bpf_program__name(prog))) {
+	if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
 		env.progs_skipped++;
 		return 0;
 	}
@@ -602,7 +603,7 @@ static int process_obj(const char *filename)
 	LIBBPF_OPTS(bpf_object_open_opts, opts);
 	int err = 0, prog_cnt = 0;
 
-	if (!should_process_file(basename(filename))) {
+	if (!should_process_file_prog(basename(filename), NULL)) {
 		if (env.verbose)
 			printf("Skipping '%s' due to filters...\n", filename);
 		env.files_skipped++;
@@ -980,7 +981,7 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs,
 		 * parsed entire line; if row should be ignored we pretend we
 		 * never parsed it
 		 */
-		if (!should_process_prog(st->file_name, st->prog_name)) {
+		if (!should_process_file_prog(st->file_name, st->prog_name)) {
 			free(st->file_name);
 			free(st->prog_name);
 			*stat_cntp -= 1;
@@ -1391,11 +1392,13 @@ int main(int argc, char **argv)
 		free(env.filenames[i]);
 	free(env.filenames);
 	for (i = 0; i < env.allow_filter_cnt; i++) {
+		free(env.allow_filters[i].any_glob);
 		free(env.allow_filters[i].file_glob);
 		free(env.allow_filters[i].prog_glob);
 	}
 	free(env.allow_filters);
 	for (i = 0; i < env.deny_filter_cnt; i++) {
+		free(env.deny_filters[i].any_glob);
 		free(env.deny_filters[i].file_glob);
 		free(env.deny_filters[i].prog_glob);
 	}
-- 
cgit v1.2.3-58-ga151


From b9670b904a59808ef4222179a255978384bcc119 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:52:58 -0700
Subject: selftests/bpf: ensure we always have non-ambiguous sorting in
 veristat

Always fall back to unique file/prog comparison if user's custom order
specs are ambiguous. This ensures stable output no matter what.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index f6f6a2490489..0da3ecf6ed52 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -723,7 +723,11 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 			return cmp;
 	}
 
-	return 0;
+	/* always disambiguate with file+prog, which are unique */
+	cmp = strcmp(s1->file_name, s2->file_name);
+	if (cmp != 0)
+		return cmp;
+	return strcmp(s1->prog_name, s2->prog_name);
 }
 
 #define HEADER_CHAR '-'
-- 
cgit v1.2.3-58-ga151


From d68c07e2dd91c3e8bc451ecb218b77da2635cdd4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:52:59 -0700
Subject: selftests/bpf: allow to define asc/desc ordering for sort specs in
 veristat

Allow to specify '^' at the end of stat name to designate that it should
be sorted in ascending order. Similarly, allow any of 'v', 'V', '.',
'!', or '_' suffix "symbols" to designate descending order. It's such
a zoo for descending order because there is no single intuitive symbol
that could be used (using 'v' looks pretty weird in practice), so few
symbols that are "downwards leaning or pointing" were chosen. Either
way, it shouldn't cause any troubles in practice.

This new feature allows to customize sortering order to match user's
needs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 63 ++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 0da3ecf6ed52..56ba55156abb 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -419,32 +419,65 @@ static struct stat_def {
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 };
 
+static bool parse_stat_id(const char *name, size_t len, int *id)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
+		struct stat_def *def = &stat_defs[i];
+
+		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
+
+			if (!def->names[j] ||
+			    strlen(def->names[j]) != len ||
+			    strncmp(def->names[j], name, len) != 0)
+				continue;
+
+			*id = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool is_asc_sym(char c)
+{
+	return c == '^';
+}
+
+static bool is_desc_sym(char c)
+{
+	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
+}
+
 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 {
-	int id, i;
+	int id;
+	bool has_order = false, is_asc = false;
+	size_t len = strlen(stat_name);
 
 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
 		return -E2BIG;
 	}
 
-	for (id = 0; id < ARRAY_SIZE(stat_defs); id++) {
-		struct stat_def *def = &stat_defs[id];
-
-		for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) {
-			if (!def->names[i] || strcmp(def->names[i], stat_name) != 0)
-				continue;
-
-			specs->ids[specs->spec_cnt] = id;
-			specs->asc[specs->spec_cnt] = def->asc_by_default;
-			specs->spec_cnt++;
+	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
+		has_order = true;
+		is_asc = is_asc_sym(stat_name[len - 1]);
+		len -= 1;
+	}
 
-			return 0;
-		}
+	if (!parse_stat_id(stat_name, len, &id)) {
+		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
+		return -ESRCH;
 	}
 
-	fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
-	return -ESRCH;
+	specs->ids[specs->spec_cnt] = id;
+	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+	specs->spec_cnt++;
+
+	return 0;
 }
 
 static int parse_stats(const char *stats_str, struct stat_specs *specs)
-- 
cgit v1.2.3-58-ga151


From 1bb4ec815015609c9458d5ffeb5c8cc95b7d44d6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:53:00 -0700
Subject: selftests/bpf: support simple filtering of stats in veristat

Define simple expressions to filter not just by file and program name,
but also by resulting values of collected stats. Support usual
equality and inequality operators. Verdict, which is a boolean-like
field can be also filtered either as 0/1, failure/success (with f/s,
fail/succ, and failure/success aliases) symbols, or as false/true (f/t).
Aliases are case insensitive.

Currently this filtering is honored only in verification and replay
modes. Comparison mode support will be added in next patch.

Here's an example of verifying a bunch of BPF object files and emitting
only results for successfully validated programs that have more than 100
total instructions processed by BPF verifier, sorted by number of
instructions in ascending order:

  $ sudo ./veristat *.bpf.o -s insns^ -f 'insns>100'

There can be many filters (both allow and deny flavors), all of them are
combined.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 158 ++++++++++++++++++++++++++++++++-
 1 file changed, 157 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 56ba55156abb..37e512d233a7 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -54,10 +54,30 @@ enum resfmt {
 	RESFMT_CSV,
 };
 
+enum filter_kind {
+	FILTER_NAME,
+	FILTER_STAT,
+};
+
+enum operator_kind {
+	OP_EQ,		/* == or = */
+	OP_NEQ,		/* != or <> */
+	OP_LT,		/* < */
+	OP_LE,		/* <= */
+	OP_GT,		/* > */
+	OP_GE,		/* >= */
+};
+
 struct filter {
+	enum filter_kind kind;
+	/* FILTER_NAME */
 	char *any_glob;
 	char *file_glob;
 	char *prog_glob;
+	/* FILTER_STAT */
+	enum operator_kind op;
+	int stat_id;
+	long value;
 };
 
 static struct env {
@@ -271,6 +291,8 @@ static bool should_process_file_prog(const char *filename, const char *prog_name
 
 	for (i = 0; i < env.deny_filter_cnt; i++) {
 		f = &env.deny_filters[i];
+		if (f->kind != FILTER_NAME)
+			continue;
 
 		if (f->any_glob && glob_matches(filename, f->any_glob))
 			return false;
@@ -284,8 +306,10 @@ static bool should_process_file_prog(const char *filename, const char *prog_name
 
 	for (i = 0; i < env.allow_filter_cnt; i++) {
 		f = &env.allow_filters[i];
-		allow_cnt++;
+		if (f->kind != FILTER_NAME)
+			continue;
 
+		allow_cnt++;
 		if (f->any_glob) {
 			if (glob_matches(filename, f->any_glob))
 				return true;
@@ -306,11 +330,32 @@ static bool should_process_file_prog(const char *filename, const char *prog_name
 	return allow_cnt == 0;
 }
 
+static struct {
+	enum operator_kind op_kind;
+	const char *op_str;
+} operators[] = {
+	/* Order of these definitions matter to avoid situations like '<'
+	 * matching part of what is actually a '<>' operator. That is,
+	 * substrings should go last.
+	 */
+	{ OP_EQ, "==" },
+	{ OP_NEQ, "!=" },
+	{ OP_NEQ, "<>" },
+	{ OP_LE, "<=" },
+	{ OP_LT, "<" },
+	{ OP_GE, ">=" },
+	{ OP_GT, ">" },
+	{ OP_EQ, "=" },
+};
+
+static bool parse_stat_id(const char *name, size_t len, int *id);
+
 static int append_filter(struct filter **filters, int *cnt, const char *str)
 {
 	struct filter *f;
 	void *tmp;
 	const char *p;
+	int i;
 
 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
 	if (!tmp)
@@ -320,6 +365,67 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 	f = &(*filters)[*cnt];
 	memset(f, 0, sizeof(*f));
 
+	/* First, let's check if it's a stats filter of the following form:
+	 * <stat><op><value, where:
+	 *   - <stat> is one of supported numerical stats (verdict is also
+	 *     considered numerical, failure == 0, success == 1);
+	 *   - <op> is comparison operator (see `operators` definitions);
+	 *   - <value> is an integer (or failure/success, or false/true as
+	 *     special aliases for 0 and 1, respectively).
+	 * If the form doesn't match what user provided, we assume file/prog
+	 * glob filter.
+	 */
+	for (i = 0; i < ARRAY_SIZE(operators); i++) {
+		int id;
+		long val;
+		const char *end = str;
+		const char *op_str;
+
+		op_str = operators[i].op_str;
+		p = strstr(str, op_str);
+		if (!p)
+			continue;
+
+		if (!parse_stat_id(str, p - str, &id)) {
+			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
+			return -EINVAL;
+		}
+		if (id >= FILE_NAME) {
+			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
+			return -EINVAL;
+		}
+
+		p += strlen(op_str);
+
+		if (strcasecmp(p, "true") == 0 ||
+		    strcasecmp(p, "t") == 0 ||
+		    strcasecmp(p, "success") == 0 ||
+		    strcasecmp(p, "succ") == 0 ||
+		    strcasecmp(p, "s") == 0) {
+			val = 1;
+		} else if (strcasecmp(p, "false") == 0 ||
+			   strcasecmp(p, "f") == 0 ||
+			   strcasecmp(p, "failure") == 0 ||
+			   strcasecmp(p, "fail") == 0) {
+			val = 0;
+		} else {
+			errno = 0;
+			val = strtol(p, (char **)&end, 10);
+			if (errno || end == p || *end != '\0' ) {
+				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
+				return -EINVAL;
+			}
+		}
+
+		f->kind = FILTER_STAT;
+		f->stat_id = id;
+		f->op = operators[i].op_kind;
+		f->value = val;
+
+		*cnt += 1;
+		return 0;
+	}
+
 	/* File/prog filter can be specified either as '<glob>' or
 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
 	 * both file and program names. This seems to be way more useful in
@@ -328,6 +434,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 	 * name. But usually common <glob> seems to be the most useful and
 	 * ergonomic way.
 	 */
+	f->kind = FILTER_NAME;
 	p = strchr(str, '/');
 	if (!p) {
 		f->any_glob = strdup(str);
@@ -1317,6 +1424,51 @@ one_more_time:
 	return 0;
 }
 
+static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
+{
+	long value = stats->stats[f->stat_id];
+
+	switch (f->op) {
+	case OP_EQ: return value == f->value;
+	case OP_NEQ: return value != f->value;
+	case OP_LT: return value < f->value;
+	case OP_LE: return value <= f->value;
+	case OP_GT: return value > f->value;
+	case OP_GE: return value >= f->value;
+	}
+
+	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+	return false;
+}
+
+static bool should_output_stats(const struct verif_stats *stats)
+{
+	struct filter *f;
+	int i, allow_cnt = 0;
+
+	for (i = 0; i < env.deny_filter_cnt; i++) {
+		f = &env.deny_filters[i];
+		if (f->kind != FILTER_STAT)
+			continue;
+
+		if (is_stat_filter_matched(f, stats))
+			return false;
+	}
+
+	for (i = 0; i < env.allow_filter_cnt; i++) {
+		f = &env.allow_filters[i];
+		if (f->kind != FILTER_STAT)
+			continue;
+		allow_cnt++;
+
+		if (is_stat_filter_matched(f, stats))
+			return true;
+	}
+
+	/* if there are no stat allowed filters, pass everything through */
+	return allow_cnt == 0;
+}
+
 static void output_prog_stats(void)
 {
 	const struct verif_stats *stats;
@@ -1327,6 +1479,8 @@ static void output_prog_stats(void)
 		output_headers(RESFMT_TABLE_CALCLEN);
 		for (i = 0; i < env.prog_stat_cnt; i++) {
 			stats = &env.prog_stats[i];
+			if (!should_output_stats(stats))
+				continue;
 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
 			last_stat_idx = i;
 		}
@@ -1336,6 +1490,8 @@ static void output_prog_stats(void)
 	output_headers(env.out_fmt);
 	for (i = 0; i < env.prog_stat_cnt; i++) {
 		stats = &env.prog_stats[i];
+		if (!should_output_stats(stats))
+			continue;
 		output_stats(stats, env.out_fmt, i == last_stat_idx);
 	}
 }
-- 
cgit v1.2.3-58-ga151


From 77534401d69c2a35d2a53e599fafb5f0f604e45d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:53:01 -0700
Subject: selftests/bpf: make veristat emit all stats in CSV mode by default

Make veristat distinguish between table and CSV output formats and use
different default set of stats (columns) that are emitted. While for
human-readable table output it doesn't make sense to output all known
stats, it is very useful for CSV mode to record all possible data, so
that it can later be queried and filtered in replay or comparison mode.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 37e512d233a7..ec1a8ba7791c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -501,6 +501,15 @@ static const struct stat_specs default_output_spec = {
 	},
 };
 
+static const struct stat_specs default_csv_output_spec = {
+	.spec_cnt = 9,
+	.ids = {
+		FILE_NAME, PROG_NAME, VERDICT, DURATION,
+		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+	},
+};
+
 static const struct stat_specs default_sort_spec = {
 	.spec_cnt = 2,
 	.ids = {
@@ -1561,8 +1570,12 @@ int main(int argc, char **argv)
 	if (env.verbose && env.log_level == 0)
 		env.log_level = 1;
 
-	if (env.output_spec.spec_cnt == 0)
-		env.output_spec = default_output_spec;
+	if (env.output_spec.spec_cnt == 0) {
+		if (env.out_fmt == RESFMT_CSV)
+			env.output_spec = default_csv_output_spec;
+		else
+			env.output_spec = default_output_spec;
+	}
 	if (env.sort_spec.spec_cnt == 0)
 		env.sort_spec = default_sort_spec;
 
-- 
cgit v1.2.3-58-ga151


From a5710848d824168b5f7f02aa3689e648c46b2e46 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:53:02 -0700
Subject: selftests/bpf: handle missing records in comparison mode better in
 veristat

When comparing two datasets, if either side is missing corresponding
record with the same file and prog name, currently veristat emits
misleading zeros/failures, and even tried to calculate a difference,
even though there is no data to compare against.

This patch improves internal logic of handling such situations. Now
we'll emit "N/A" in places where data is missing and comparison is
non-sensical.

As an example, in an artificially truncated and mismatched Cilium
results, the output looks like below:

  $ ./veristat -e file,prog,verdict,insns -C ~/base.csv ~/comp.csv
  File                Program                         Verdict (A)  Verdict (B)  Verdict (DIFF)  Insns (A)  Insns (B)  Insns   (DIFF)
  ------------------  ------------------------------  -----------  -----------  --------------  ---------  ---------  --------------
  bpf_alignchecker.o  __send_drop_notify              success      N/A          N/A                    53        N/A             N/A
  bpf_alignchecker.o  tail_icmp6_handle_ns            failure      failure      MATCH                  33         33     +0 (+0.00%)
  bpf_alignchecker.o  tail_icmp6_send_echo_reply      N/A          failure      N/A                   N/A         74             N/A
  bpf_host.o          __send_drop_notify              success      N/A          N/A                    53        N/A             N/A
  bpf_host.o          cil_from_host                   success      N/A          N/A                   762        N/A             N/A
  bpf_xdp.o           __send_drop_notify              success      success      MATCH                 151        151     +0 (+0.00%)
  bpf_xdp.o           cil_xdp_entry                   success      success      MATCH                 423        423     +0 (+0.00%)
  bpf_xdp.o           tail_handle_nat_fwd_ipv4        success      success      MATCH               21547      20920   -627 (-2.91%)
  bpf_xdp.o           tail_handle_nat_fwd_ipv6        success      success      MATCH               16974      17039    +65 (+0.38%)
  bpf_xdp.o           tail_lb_ipv4                    success      success      MATCH               71736      73430  +1694 (+2.36%)
  bpf_xdp.o           tail_lb_ipv6                    N/A          success      N/A                   N/A     151895             N/A
  bpf_xdp.o           tail_nodeport_ipv4_dsr          N/A          success      N/A                   N/A       1162             N/A
  bpf_xdp.o           tail_nodeport_ipv6_dsr          N/A          success      N/A                   N/A       1206             N/A
  bpf_xdp.o           tail_nodeport_nat_egress_ipv4   N/A          success      N/A                   N/A      15619             N/A
  bpf_xdp.o           tail_nodeport_nat_ingress_ipv4  success      success      MATCH                7658       7713    +55 (+0.72%)
  bpf_xdp.o           tail_nodeport_nat_ingress_ipv6  success      success      MATCH                6405       6397     -8 (-0.12%)
  bpf_xdp.o           tail_nodeport_nat_ipv6_egress   failure      failure      MATCH                 752        752     +0 (+0.00%)
  bpf_xdp.o           tail_rev_nodeport_lb4           success      success      MATCH                7126       6934   -192 (-2.69%)
  bpf_xdp.o           tail_rev_nodeport_lb6           success      success      MATCH               17954      17905    -49 (-0.27%)
  ------------------  ------------------------------  -----------  -----------  --------------  ---------  ---------  --------------

Internally veristat now separates joining two datasets and remembering the
join, and actually emitting a comparison view. This will come handy when we add
support for filtering and custom ordering in comparison mode.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-9-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 147 ++++++++++++++++++++++++---------
 1 file changed, 110 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index ec1a8ba7791c..5a9568a8c0bf 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -41,6 +41,15 @@ struct verif_stats {
 	long stats[NUM_STATS_CNT];
 };
 
+/* joined comparison mode stats */
+struct verif_stats_join {
+	char *file_name;
+	char *prog_name;
+
+	const struct verif_stats *stats_a;
+	const struct verif_stats *stats_b;
+};
+
 struct stat_specs {
 	int spec_cnt;
 	enum stat_id ids[ALL_STATS_CNT];
@@ -97,6 +106,9 @@ static struct env {
 	struct verif_stats *baseline_stats;
 	int baseline_stat_cnt;
 
+	struct verif_stats_join *join_stats;
+	int join_stat_cnt;
+
 	struct stat_specs output_spec;
 	struct stat_specs sort_spec;
 
@@ -518,6 +530,15 @@ static const struct stat_specs default_sort_spec = {
 	.asc = { true, true, },
 };
 
+/* sorting for comparison mode to join two data sets */
+static const struct stat_specs join_sort_spec = {
+	.spec_cnt = 2,
+	.ids = {
+		FILE_NAME, PROG_NAME,
+	},
+	.asc = { true, true, },
+};
+
 static struct stat_def {
 	const char *header;
 	const char *names[4];
@@ -934,13 +955,16 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
 {
 	switch (id) {
 	case FILE_NAME:
-		*str = s->file_name;
+		*str = s ? s->file_name : "N/A";
 		break;
 	case PROG_NAME:
-		*str = s->prog_name;
+		*str = s ? s->prog_name : "N/A";
 		break;
 	case VERDICT:
-		*str = s->stats[VERDICT] ? "success" : "failure";
+		if (!s)
+			*str = "N/A";
+		else
+			*str = s->stats[VERDICT] ? "success" : "failure";
 		break;
 	case DURATION:
 	case TOTAL_INSNS:
@@ -948,7 +972,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
 	case MARK_READ_MAX_LEN:
-		*val = s->stats[id];
+		*val = s ? s->stats[id] : 0;
 		break;
 	default:
 		fprintf(stderr, "Unrecognized stat #%d\n", id);
@@ -1223,9 +1247,11 @@ static void output_comp_headers(enum resfmt fmt)
 		output_comp_header_underlines();
 }
 
-static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp,
+static void output_comp_stats(const struct verif_stats_join *join_stats,
 			      enum resfmt fmt, bool last)
 {
+	const struct verif_stats *base = join_stats->stats_a;
+	const struct verif_stats *comp = join_stats->stats_b;
 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
 	int i;
 
@@ -1243,33 +1269,45 @@ static void output_comp_stats(const struct verif_stats *base, const struct verif
 		/* normalize all the outputs to be in string buffers for simplicity */
 		if (is_key_stat(id)) {
 			/* key stats (file and program name) are always strings */
-			if (base != &fallback_stats)
+			if (base)
 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
 			else
 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
 		} else if (base_str) {
 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
-			if (strcmp(base_str, comp_str) == 0)
+			if (!base || !comp)
+				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+			else if (strcmp(base_str, comp_str) == 0)
 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
 			else
 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
 		} else {
 			double p = 0.0;
 
-			snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
-			snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+			if (base)
+				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
+			else
+				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
+			if (comp)
+				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+			else
+				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
 
 			diff_val = comp_val - base_val;
-			if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) {
-				if (comp_val == base_val)
-					p = 0.0; /* avoid +0 (+100%) case */
-				else
-					p = comp_val < base_val ? -100.0 : 100.0;
+			if (!base || !comp) {
+				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
 			} else {
-				 p = diff_val * 100.0 / base_val;
+				if (base_val == 0) {
+					if (comp_val == base_val)
+						p = 0.0; /* avoid +0 (+100%) case */
+					else
+						p = comp_val < base_val ? -100.0 : 100.0;
+				} else {
+					 p = diff_val * 100.0 / base_val;
+				}
+				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
 			}
-			snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
 		}
 
 		switch (fmt) {
@@ -1328,6 +1366,7 @@ static int cmp_stats_key(const struct verif_stats *base, const struct verif_stat
 static int handle_comparison_mode(void)
 {
 	struct stat_specs base_specs = {}, comp_specs = {};
+	struct stat_specs tmp_sort_spec;
 	enum resfmt cur_fmt;
 	int err, i, j;
 
@@ -1370,31 +1409,26 @@ static int handle_comparison_mode(void)
 		}
 	}
 
+	/* Replace user-specified sorting spec with file+prog sorting rule to
+	 * be able to join two datasets correctly. Once we are done, we will
+	 * restore the original sort spec.
+	 */
+	tmp_sort_spec = env.sort_spec;
+	env.sort_spec = join_sort_spec;
 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
+	env.sort_spec = tmp_sort_spec;
 
-	/* for human-readable table output we need to do extra pass to
-	 * calculate column widths, so we substitute current output format
-	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
-	 * and do everything again.
-	 */
-	if (env.out_fmt == RESFMT_TABLE)
-		cur_fmt = RESFMT_TABLE_CALCLEN;
-	else
-		cur_fmt = env.out_fmt;
-
-one_more_time:
-	output_comp_headers(cur_fmt);
-
-	/* If baseline and comparison datasets have different subset of rows
-	 * (we match by 'object + prog' as a unique key) then assume
-	 * empty/missing/zero value for rows that are missing in the opposite
-	 * data set
+	/* Join two datasets together. If baseline and comparison datasets
+	 * have different subset of rows (we match by 'object + prog' as
+	 * a unique key) then assume empty/missing/zero value for rows that
+	 * are missing in the opposite data set.
 	 */
 	i = j = 0;
 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
-		bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1);
 		const struct verif_stats *base, *comp;
+		struct verif_stats_join *join;
+		void *tmp;
 		int r;
 
 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
@@ -1411,18 +1445,56 @@ one_more_time:
 			return -EINVAL;
 		}
 
+		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
+		if (!tmp)
+			return -ENOMEM;
+		env.join_stats = tmp;
+
+		join = &env.join_stats[env.join_stat_cnt];
+		memset(join, 0, sizeof(*join));
+
 		r = cmp_stats_key(base, comp);
 		if (r == 0) {
-			output_comp_stats(base, comp, cur_fmt, last);
+			join->file_name = base->file_name;
+			join->prog_name = base->prog_name;
+			join->stats_a = base;
+			join->stats_b = comp;
 			i++;
 			j++;
 		} else if (comp == &fallback_stats || r < 0) {
-			output_comp_stats(base, &fallback_stats, cur_fmt, last);
+			join->file_name = base->file_name;
+			join->prog_name = base->prog_name;
+			join->stats_a = base;
+			join->stats_b = NULL;
 			i++;
 		} else {
-			output_comp_stats(&fallback_stats, comp, cur_fmt, last);
+			join->file_name = comp->file_name;
+			join->prog_name = comp->prog_name;
+			join->stats_a = NULL;
+			join->stats_b = comp;
 			j++;
 		}
+		env.join_stat_cnt += 1;
+	}
+
+	/* for human-readable table output we need to do extra pass to
+	 * calculate column widths, so we substitute current output format
+	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
+	 * and do everything again.
+	 */
+	if (env.out_fmt == RESFMT_TABLE)
+		cur_fmt = RESFMT_TABLE_CALCLEN;
+	else
+		cur_fmt = env.out_fmt;
+
+one_more_time:
+	output_comp_headers(cur_fmt);
+
+	for (i = 0; i < env.join_stat_cnt; i++) {
+		const struct verif_stats_join *join = &env.join_stats[i];
+		bool last = i == env.join_stat_cnt - 1;
+
+		output_comp_stats(join, cur_fmt, last);
 	}
 
 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1594,6 +1666,7 @@ int main(int argc, char **argv)
 
 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
+	free(env.join_stats);
 	for (i = 0; i < env.filename_cnt; i++)
 		free(env.filenames[i]);
 	free(env.filenames);
-- 
cgit v1.2.3-58-ga151


From fa9bb590c2895b14c9da46ba1860d06efba55657 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:53:03 -0700
Subject: selftests/bpf: support stats ordering in comparison mode in veristat

Introduce the concept of "stat variant", by which it's possible to
specify whether to use the value from A (baseline) side, B (comparison
or control) side, the absolute difference value or relative (percentage)
difference value.

To support specifying this, veristat recognizes `_a`, `_b`, `_diff`,
`_pct` suffixes, which can be appended to stat name(s). In
non-comparison mode variants are ignored (there is only `_a` variant
effectively), if no variant suffix is provided, `_b` is assumed, as
control group is of primary interest in comparison mode.

These stat variants can be flexibly combined with asc/desc orders.

Here's an example of ordering results first by verdict match/mismatch (or n/a
if one of the sides is missing; n/a is always considered to be the lowest
value), and within each match/mismatch/n/a group further sort by number of
instructions in B side. In this case we don't have MISMATCH cases, but N/A are
split from MATCH, demonstrating this custom ordering.

  $ ./veristat -e file,prog,verdict,insns -s verdict_diff,insns_b_ -C ~/base.csv ~/comp.csv
  File                Program                         Verdict (A)  Verdict (B)  Verdict (DIFF)  Insns (A)  Insns (B)  Insns   (DIFF)
  ------------------  ------------------------------  -----------  -----------  --------------  ---------  ---------  --------------
  bpf_xdp.o           tail_lb_ipv6                    N/A          success      N/A                   N/A     151895             N/A
  bpf_xdp.o           tail_nodeport_nat_egress_ipv4   N/A          success      N/A                   N/A      15619             N/A
  bpf_xdp.o           tail_nodeport_ipv6_dsr          N/A          success      N/A                   N/A       1206             N/A
  bpf_xdp.o           tail_nodeport_ipv4_dsr          N/A          success      N/A                   N/A       1162             N/A
  bpf_alignchecker.o  tail_icmp6_send_echo_reply      N/A          failure      N/A                   N/A         74             N/A
  bpf_alignchecker.o  __send_drop_notify              success      N/A          N/A                    53        N/A             N/A
  bpf_host.o          __send_drop_notify              success      N/A          N/A                    53        N/A             N/A
  bpf_host.o          cil_from_host                   success      N/A          N/A                   762        N/A             N/A
  bpf_xdp.o           tail_lb_ipv4                    success      success      MATCH               71736      73430  +1694 (+2.36%)
  bpf_xdp.o           tail_handle_nat_fwd_ipv4        success      success      MATCH               21547      20920   -627 (-2.91%)
  bpf_xdp.o           tail_rev_nodeport_lb6           success      success      MATCH               17954      17905    -49 (-0.27%)
  bpf_xdp.o           tail_handle_nat_fwd_ipv6        success      success      MATCH               16974      17039    +65 (+0.38%)
  bpf_xdp.o           tail_nodeport_nat_ingress_ipv4  success      success      MATCH                7658       7713    +55 (+0.72%)
  bpf_xdp.o           tail_rev_nodeport_lb4           success      success      MATCH                7126       6934   -192 (-2.69%)
  bpf_xdp.o           tail_nodeport_nat_ingress_ipv6  success      success      MATCH                6405       6397     -8 (-0.12%)
  bpf_xdp.o           tail_nodeport_nat_ipv6_egress   failure      failure      MATCH                 752        752     +0 (+0.00%)
  bpf_xdp.o           cil_xdp_entry                   success      success      MATCH                 423        423     +0 (+0.00%)
  bpf_xdp.o           __send_drop_notify              success      success      MATCH                 151        151     +0 (+0.00%)
  bpf_alignchecker.o  tail_icmp6_handle_ns            failure      failure      MATCH                  33         33     +0 (+0.00%)
  ------------------  ------------------------------  -----------  -----------  --------------  ---------  ---------  --------------

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 192 +++++++++++++++++++++++++++++++--
 1 file changed, 182 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 5a9568a8c0bf..f2ea825ee80a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -17,6 +17,7 @@
 #include <bpf/libbpf.h>
 #include <libelf.h>
 #include <gelf.h>
+#include <float.h>
 
 enum stat_id {
 	VERDICT,
@@ -34,6 +35,45 @@ enum stat_id {
 	NUM_STATS_CNT = FILE_NAME - VERDICT,
 };
 
+/* In comparison mode each stat can specify up to four different values:
+ *   - A side value;
+ *   - B side value;
+ *   - absolute diff value;
+ *   - relative (percentage) diff value.
+ *
+ * When specifying stat specs in comparison mode, user can use one of the
+ * following variant suffixes to specify which exact variant should be used for
+ * ordering or filtering:
+ *   - `_a` for A side value;
+ *   - `_b` for B side value;
+ *   - `_diff` for absolute diff value;
+ *   - `_pct` for relative (percentage) diff value.
+ *
+ * If no variant suffix is provided, then `_b` (control data) is assumed.
+ *
+ * As an example, let's say instructions stat has the following output:
+ *
+ * Insns (A)  Insns (B)  Insns   (DIFF)
+ * ---------  ---------  --------------
+ * 21547      20920       -627 (-2.91%)
+ *
+ * Then:
+ *   - 21547 is A side value (insns_a);
+ *   - 20920 is B side value (insns_b);
+ *   - -627 is absolute diff value (insns_diff);
+ *   - -2.91% is relative diff value (insns_pct).
+ *
+ * For verdict there is no verdict_pct variant.
+ * For file and program name, _a and _b variants are equivalent and there are
+ * no _diff or _pct variants.
+ */
+enum stat_variant {
+	VARIANT_A,
+	VARIANT_B,
+	VARIANT_DIFF,
+	VARIANT_PCT,
+};
+
 struct verif_stats {
 	char *file_name;
 	char *prog_name;
@@ -53,6 +93,7 @@ struct verif_stats_join {
 struct stat_specs {
 	int spec_cnt;
 	enum stat_id ids[ALL_STATS_CNT];
+	enum stat_variant variants[ALL_STATS_CNT];
 	bool asc[ALL_STATS_CNT];
 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 };
@@ -86,6 +127,7 @@ struct filter {
 	/* FILTER_STAT */
 	enum operator_kind op;
 	int stat_id;
+	enum stat_variant stat_var;
 	long value;
 };
 
@@ -360,7 +402,7 @@ static struct {
 	{ OP_EQ, "=" },
 };
 
-static bool parse_stat_id(const char *name, size_t len, int *id);
+static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
 
 static int append_filter(struct filter **filters, int *cnt, const char *str)
 {
@@ -388,6 +430,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 	 * glob filter.
 	 */
 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
+		enum stat_variant var;
 		int id;
 		long val;
 		const char *end = str;
@@ -398,7 +441,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		if (!p)
 			continue;
 
-		if (!parse_stat_id(str, p - str, &id)) {
+		if (!parse_stat_id_var(str, p - str, &id, &var)) {
 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 			return -EINVAL;
 		}
@@ -431,6 +474,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 
 		f->kind = FILTER_STAT;
 		f->stat_id = id;
+		f->stat_var = var;
 		f->op = operators[i].op_kind;
 		f->value = val;
 
@@ -556,22 +600,52 @@ static struct stat_def {
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 };
 
-static bool parse_stat_id(const char *name, size_t len, int *id)
+static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
 {
-	int i, j;
+	static const char *var_sfxs[] = {
+		[VARIANT_A] = "_a",
+		[VARIANT_B] = "_b",
+		[VARIANT_DIFF] = "_diff",
+		[VARIANT_PCT] = "_pct",
+	};
+	int i, j, k;
 
 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 		struct stat_def *def = &stat_defs[i];
+		size_t alias_len, sfx_len;
+		const char *alias;
 
 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
+			alias = def->names[j];
+			if (!alias)
+				continue;
 
-			if (!def->names[j] ||
-			    strlen(def->names[j]) != len ||
-			    strncmp(def->names[j], name, len) != 0)
+			alias_len = strlen(alias);
+			if (strncmp(name, alias, alias_len) != 0)
 				continue;
 
-			*id = i;
-			return true;
+			if (alias_len == len) {
+				/* If no variant suffix is specified, we
+				 * assume control group (just in case we are
+				 * in comparison mode. Variant is ignored in
+				 * non-comparison mode.
+				 */
+				*var = VARIANT_B;
+				*id = i;
+				return true;
+			}
+
+			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
+				sfx_len = strlen(var_sfxs[k]);
+				if (alias_len + sfx_len != len)
+					continue;
+
+				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
+					*var = (enum stat_variant)k;
+					*id = i;
+					return true;
+				}
+			}
 		}
 	}
 
@@ -593,6 +667,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 	int id;
 	bool has_order = false, is_asc = false;
 	size_t len = strlen(stat_name);
+	enum stat_variant var;
 
 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
@@ -605,12 +680,13 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 		len -= 1;
 	}
 
-	if (!parse_stat_id(stat_name, len, &id)) {
+	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 		return -ESRCH;
 	}
 
 	specs->ids[specs->spec_cnt] = id;
+	specs->variants[specs->spec_cnt] = var;
 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
 	specs->spec_cnt++;
 
@@ -900,6 +976,99 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 	return strcmp(s1->prog_name, s2->prog_name);
 }
 
+static void fetch_join_stat_value(const struct verif_stats_join *s,
+				  enum stat_id id, enum stat_variant var,
+				  const char **str_val,
+				  double *num_val)
+{
+	long v1, v2;
+
+	if (id == FILE_NAME) {
+		*str_val = s->file_name;
+		return;
+	}
+	if (id == PROG_NAME) {
+		*str_val = s->prog_name;
+		return;
+	}
+
+	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
+	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
+
+	switch (var) {
+	case VARIANT_A:
+		if (!s->stats_a)
+			*num_val = -DBL_MAX;
+		else
+			*num_val = s->stats_a->stats[id];
+		return;
+	case VARIANT_B:
+		if (!s->stats_b)
+			*num_val = -DBL_MAX;
+		else
+			*num_val = s->stats_b->stats[id];
+		return;
+	case VARIANT_DIFF:
+		if (!s->stats_a || !s->stats_b)
+			*num_val = -DBL_MAX;
+		else
+			*num_val = (double)(v2 - v1);
+		return;
+	case VARIANT_PCT:
+		if (!s->stats_a || !s->stats_b) {
+			*num_val = -DBL_MAX;
+		} else if (v1 == 0) {
+			if (v1 == v2)
+				*num_val = 0.0;
+			else
+				*num_val = v2 < v1 ? -100.0 : 100.0;
+		} else {
+			 *num_val = (v2 - v1) * 100.0 / v1;
+		}
+		return;
+	}
+}
+
+static int cmp_join_stat(const struct verif_stats_join *s1,
+			 const struct verif_stats_join *s2,
+			 enum stat_id id, enum stat_variant var, bool asc)
+{
+	const char *str1 = NULL, *str2 = NULL;
+	double v1, v2;
+	int cmp = 0;
+
+	fetch_join_stat_value(s1, id, var, &str1, &v1);
+	fetch_join_stat_value(s2, id, var, &str2, &v2);
+
+	if (str1)
+		cmp = strcmp(str1, str2);
+	else if (v1 != v2)
+		cmp = v1 < v2 ? -1 : 1;
+
+	return asc ? cmp : -cmp;
+}
+
+static int cmp_join_stats(const void *v1, const void *v2)
+{
+	const struct verif_stats_join *s1 = v1, *s2 = v2;
+	int i, cmp;
+
+	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+		cmp = cmp_join_stat(s1, s2,
+				    env.sort_spec.ids[i],
+				    env.sort_spec.variants[i],
+				    env.sort_spec.asc[i]);
+		if (cmp != 0)
+			return cmp;
+	}
+
+	/* always disambiguate with file+prog, which are unique */
+	cmp = strcmp(s1->file_name, s2->file_name);
+	if (cmp != 0)
+		return cmp;
+	return strcmp(s1->prog_name, s2->prog_name);
+}
+
 #define HEADER_CHAR '-'
 #define COLUMN_SEP "  "
 
@@ -1477,6 +1646,9 @@ static int handle_comparison_mode(void)
 		env.join_stat_cnt += 1;
 	}
 
+	/* now sort joined results accorsing to sort spec */
+	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
+
 	/* for human-readable table output we need to do extra pass to
 	 * calculate column widths, so we substitute current output format
 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
-- 
cgit v1.2.3-58-ga151


From d5ce4b89234156d66ac8a59bddbc341667aadf86 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Nov 2022 22:53:04 -0700
Subject: selftests/bpf: support stat filtering in comparison mode in veristat

Finally add support for filtering stats values, similar to
non-comparison mode filtering. For comparison mode 4 variants of stats
are important for filtering, as they allow to filter either A or B side,
but even more importantly they allow to filter based on value
difference, and for verdict stat value difference is MATCH/MISMATCH
classification. So with these changes it's finally possible to easily
check if there were any mismatches between failure/success outcomes on
two separate data sets. Like in an example below:

  $ ./veristat -e file,prog,verdict,insns -C ~/baseline-results.csv ~/shortest-results.csv -f verdict_diff=mismatch
  File                                   Program                Verdict (A)  Verdict (B)  Verdict (DIFF)  Insns (A)  Insns (B)  Insns        (DIFF)
  -------------------------------------  ---------------------  -----------  -----------  --------------  ---------  ---------  -------------------
  dynptr_success.bpf.linked1.o           test_data_slice        success      failure      MISMATCH               85          0       -85 (-100.00%)
  dynptr_success.bpf.linked1.o           test_read_write        success      failure      MISMATCH             1992          0     -1992 (-100.00%)
  dynptr_success.bpf.linked1.o           test_ringbuf           success      failure      MISMATCH               74          0       -74 (-100.00%)
  kprobe_multi.bpf.linked1.o             test_kprobe            failure      success      MISMATCH                0        246      +246 (+100.00%)
  kprobe_multi.bpf.linked1.o             test_kprobe_manual     failure      success      MISMATCH                0        246      +246 (+100.00%)
  kprobe_multi.bpf.linked1.o             test_kretprobe         failure      success      MISMATCH                0        248      +248 (+100.00%)
  kprobe_multi.bpf.linked1.o             test_kretprobe_manual  failure      success      MISMATCH                0        248      +248 (+100.00%)
  kprobe_multi.bpf.linked1.o             trigger                failure      success      MISMATCH                0          2        +2 (+100.00%)
  netcnt_prog.bpf.linked1.o              bpf_nextcnt            failure      success      MISMATCH                0         56       +56 (+100.00%)
  pyperf600_nounroll.bpf.linked1.o       on_event               success      failure      MISMATCH           568128    1000001    +431873 (+76.02%)
  ringbuf_bench.bpf.linked1.o            bench_ringbuf          success      failure      MISMATCH                8          0        -8 (-100.00%)
  strobemeta.bpf.linked1.o               on_event               success      failure      MISMATCH           557149    1000001    +442852 (+79.49%)
  strobemeta_nounroll1.bpf.linked1.o     on_event               success      failure      MISMATCH            57240    1000001  +942761 (+1647.03%)
  strobemeta_nounroll2.bpf.linked1.o     on_event               success      failure      MISMATCH           501725    1000001    +498276 (+99.31%)
  strobemeta_subprogs.bpf.linked1.o      on_event               success      failure      MISMATCH            65420    1000001  +934581 (+1428.59%)
  test_map_in_map_invalid.bpf.linked1.o  xdp_noop0              success      failure      MISMATCH                2          0        -2 (-100.00%)
  test_mmap.bpf.linked1.o                test_mmap              success      failure      MISMATCH               46          0       -46 (-100.00%)
  test_verif_scale3.bpf.linked1.o        balancer_ingress       success      failure      MISMATCH           845499    1000001    +154502 (+18.27%)
  -------------------------------------  ---------------------  -----------  -----------  --------------  ---------  ---------  -------------------

Note that by filtering on verdict_diff=mismatch, it's now extremely easy and
fast to see any changes in verdict. Example above showcases both failure ->
success transitions (which are generally surprising) and success -> failure
transitions (which are expected if bugs are present).

Given veristat allows to query relative percent difference values, internal
logic for comparison mode is based on floating point numbers, so requires a bit
of epsilon precision logic, deviating from typical integer simple handling
rules.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221103055304.2904589-11-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 70 +++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index f2ea825ee80a..9e3811ab4866 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -456,12 +456,16 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		    strcasecmp(p, "t") == 0 ||
 		    strcasecmp(p, "success") == 0 ||
 		    strcasecmp(p, "succ") == 0 ||
-		    strcasecmp(p, "s") == 0) {
+		    strcasecmp(p, "s") == 0 ||
+		    strcasecmp(p, "match") == 0 ||
+		    strcasecmp(p, "m") == 0) {
 			val = 1;
 		} else if (strcasecmp(p, "false") == 0 ||
 			   strcasecmp(p, "f") == 0 ||
 			   strcasecmp(p, "failure") == 0 ||
-			   strcasecmp(p, "fail") == 0) {
+			   strcasecmp(p, "fail") == 0 ||
+			   strcasecmp(p, "mismatch") == 0 ||
+			   strcasecmp(p, "mis") == 0) {
 			val = 0;
 		} else {
 			errno = 0;
@@ -1011,6 +1015,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
 	case VARIANT_DIFF:
 		if (!s->stats_a || !s->stats_b)
 			*num_val = -DBL_MAX;
+		else if (id == VERDICT)
+			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
 		else
 			*num_val = (double)(v2 - v1);
 		return;
@@ -1532,12 +1538,61 @@ static int cmp_stats_key(const struct verif_stats *base, const struct verif_stat
 	return strcmp(base->prog_name, comp->prog_name);
 }
 
+static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
+{
+	static const double eps = 1e-9;
+	const char *str = NULL;
+	double value = 0.0;
+
+	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+
+	switch (f->op) {
+	case OP_EQ: return value > f->value - eps && value < f->value + eps;
+	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
+	case OP_LT: return value < f->value - eps;
+	case OP_LE: return value <= f->value + eps;
+	case OP_GT: return value > f->value + eps;
+	case OP_GE: return value >= f->value - eps;
+	}
+
+	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+	return false;
+}
+
+static bool should_output_join_stats(const struct verif_stats_join *stats)
+{
+	struct filter *f;
+	int i, allow_cnt = 0;
+
+	for (i = 0; i < env.deny_filter_cnt; i++) {
+		f = &env.deny_filters[i];
+		if (f->kind != FILTER_STAT)
+			continue;
+
+		if (is_join_stat_filter_matched(f, stats))
+			return false;
+	}
+
+	for (i = 0; i < env.allow_filter_cnt; i++) {
+		f = &env.allow_filters[i];
+		if (f->kind != FILTER_STAT)
+			continue;
+		allow_cnt++;
+
+		if (is_join_stat_filter_matched(f, stats))
+			return true;
+	}
+
+	/* if there are no stat allowed filters, pass everything through */
+	return allow_cnt == 0;
+}
+
 static int handle_comparison_mode(void)
 {
 	struct stat_specs base_specs = {}, comp_specs = {};
 	struct stat_specs tmp_sort_spec;
 	enum resfmt cur_fmt;
-	int err, i, j;
+	int err, i, j, last_idx;
 
 	if (env.filename_cnt != 2) {
 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1664,9 +1719,14 @@ one_more_time:
 
 	for (i = 0; i < env.join_stat_cnt; i++) {
 		const struct verif_stats_join *join = &env.join_stats[i];
-		bool last = i == env.join_stat_cnt - 1;
 
-		output_comp_stats(join, cur_fmt, last);
+		if (!should_output_join_stats(join))
+			continue;
+
+		if (cur_fmt == RESFMT_TABLE_CALCLEN)
+			last_idx = i;
+
+		output_comp_stats(join, cur_fmt, i == last_idx);
 	}
 
 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
-- 
cgit v1.2.3-58-ga151


From 4f999b767769b76378c3616c624afd6f4bb0d99f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 4 Nov 2022 09:36:49 -0700
Subject: selftests/bpf: make test_align selftest more robust

test_align selftest relies on BPF verifier log emitting register states
for specific instructions in expected format. Unfortunately, BPF
verifier precision backtracking log interferes with such expectations.
And instruction on which precision propagation happens sometimes don't
output full expected register states. This does indeed look like
something to be improved in BPF verifier, but is beyond the scope of
this patch set.

So to make test_align a bit more robust, inject few dummy R4 = R5
instructions which capture desired state of R5 and won't have precision
tracking logs on them. This fixes tests until we can improve BPF
verifier output in the presence of precision tracking.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221104163649.121784-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/align.c | 38 ++++++++++++++++----------
 1 file changed, 24 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 970f09156eb4..4666f88f2bb4 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 
 #define MAX_INSNS	512
-#define MAX_MATCHES	16
+#define MAX_MATCHES	24
 
 struct bpf_reg_match {
 	unsigned int line;
@@ -267,6 +267,7 @@ static struct bpf_align_test tests[] = {
 			 */
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
@@ -280,6 +281,7 @@ static struct bpf_align_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
@@ -311,44 +313,52 @@ static struct bpf_align_test tests[] = {
 			{15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
 			{15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Variable offset is added to R5 packet pointer,
-			 * resulting in auxiliary alignment of 4.
+			 * resulting in auxiliary alignment of 4. To avoid BPF
+			 * verifier's precision backtracking logging
+			 * interfering we also have a no-op R4 = R5
+			 * instruction to validate R5 state. We also check
+			 * that R4 is what it should be in such case.
 			 */
-			{17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			{18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off
 			 * (14) which is 16.  Then the variable offset is 4-byte
 			 * aligned, so the total offset is 4-byte aligned and
 			 * meets the load's requirements.
 			 */
-			{23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
-			{23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{25, "R5_w=pkt(off=14,r=8"},
+			{26, "R5_w=pkt(off=14,r=8"},
 			/* Variable offset is added to R5, resulting in a
-			 * variable offset of (4n).
+			 * variable offset of (4n). See comment for insn #18
+			 * for R4 = R5 trick.
 			 */
-			{26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant is added to R5 again, setting reg->off to 18. */
-			{27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* And once more we add a variable; resulting var_off
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+			{31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+			{31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
-			{33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+			{35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+			{35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
 		},
 	},
 	{
@@ -681,6 +691,6 @@ void test_align(void)
 		if (!test__start_subtest(test->descr))
 			continue;
 
-		CHECK_FAIL(do_test_single(test));
+		ASSERT_OK(do_test_single(test), test->descr);
 	}
 }
-- 
cgit v1.2.3-58-ga151


From de048b6ee86597c6079a5ed3a1cca996089a83d3 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 2 Nov 2022 01:54:12 +0200
Subject: libbpf: Resolve enum fwd as full enum64 and vice versa

Changes de-duplication logic for enums in the following way:
- update btf_hash_enum to ignore size and kind fields to get
  ENUM and ENUM64 types in a same hash bucket;
- update btf_compat_enum to consider enum fwd to be compatible with
  full enum64 (and vice versa);

This allows BTF de-duplication in the following case:

    // CU #1
    enum foo;

    struct s {
      enum foo *a;
    } *x;

    // CU #2
    enum foo {
      x = 0xfffffffff // big enough to force enum64
    };

    struct s {
      enum foo *a;
    } *y;

De-duplicated BTF prior to this commit:

    [1] ENUM64 'foo' encoding=UNSIGNED size=8 vlen=1
    	'x' val=68719476735ULL
    [2] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64
        encoding=(none)
    [3] STRUCT 's' size=8 vlen=1
    	'a' type_id=4 bits_offset=0
    [4] PTR '(anon)' type_id=1
    [5] PTR '(anon)' type_id=3
    [6] STRUCT 's' size=8 vlen=1
    	'a' type_id=8 bits_offset=0
    [7] ENUM 'foo' encoding=UNSIGNED size=4 vlen=0
    [8] PTR '(anon)' type_id=7
    [9] PTR '(anon)' type_id=6

De-duplicated BTF after this commit:

    [1] ENUM64 'foo' encoding=UNSIGNED size=8 vlen=1
    	'x' val=68719476735ULL
    [2] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64
        encoding=(none)
    [3] STRUCT 's' size=8 vlen=1
    	'a' type_id=4 bits_offset=0
    [4] PTR '(anon)' type_id=1
    [5] PTR '(anon)' type_id=3

Enum forward declarations in C do not provide information about
enumeration values range. Thus the `btf_type->size` field is
meaningless for forward enum declarations. In fact, GCC does not
encode size in DWARF for forward enum declarations
(but dwarves sets enumeration size to a default value of `sizeof(int) * 8`
when size is not specified see dwarf_loader.c:die__create_new_enumeration).

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221101235413.1824260-1-eddyz87@gmail.com
---
 tools/lib/bpf/btf.c | 75 ++++++++++++++++++-----------------------------------
 1 file changed, 25 insertions(+), 50 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 675a0df5c840..71d68bf7788c 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3404,23 +3404,17 @@ static long btf_hash_enum(struct btf_type *t)
 {
 	long h;
 
-	/* don't hash vlen and enum members to support enum fwd resolving */
+	/* don't hash vlen, enum members and size to support enum fwd resolving */
 	h = hash_combine(0, t->name_off);
-	h = hash_combine(h, t->info & ~0xffff);
-	h = hash_combine(h, t->size);
 	return h;
 }
 
-/* Check structural equality of two ENUMs. */
-static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2)
 {
 	const struct btf_enum *m1, *m2;
 	__u16 vlen;
 	int i;
 
-	if (!btf_equal_common(t1, t2))
-		return false;
-
 	vlen = btf_vlen(t1);
 	m1 = btf_enum(t1);
 	m2 = btf_enum(t2);
@@ -3433,15 +3427,12 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
 	return true;
 }
 
-static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2)
 {
 	const struct btf_enum64 *m1, *m2;
 	__u16 vlen;
 	int i;
 
-	if (!btf_equal_common(t1, t2))
-		return false;
-
 	vlen = btf_vlen(t1);
 	m1 = btf_enum64(t1);
 	m2 = btf_enum64(t2);
@@ -3455,6 +3446,19 @@ static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2)
 	return true;
 }
 
+/* Check structural equality of two ENUMs or ENUM64s. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+	if (!btf_equal_common(t1, t2))
+		return false;
+
+	/* t1 & t2 kinds are identical because of btf_equal_common */
+	if (btf_kind(t1) == BTF_KIND_ENUM)
+		return btf_equal_enum_members(t1, t2);
+	else
+		return btf_equal_enum64_members(t1, t2);
+}
+
 static inline bool btf_is_enum_fwd(struct btf_type *t)
 {
 	return btf_is_any_enum(t) && btf_vlen(t) == 0;
@@ -3464,21 +3468,14 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
 {
 	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
 		return btf_equal_enum(t1, t2);
-	/* ignore vlen when comparing */
-	return t1->name_off == t2->name_off &&
-	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
-	       t1->size == t2->size;
-}
-
-static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2)
-{
-	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
-		return btf_equal_enum64(t1, t2);
-
-	/* ignore vlen when comparing */
+	/* At this point either t1 or t2 or both are forward declarations, thus:
+	 * - skip comparing vlen because it is zero for forward declarations;
+	 * - skip comparing size to allow enum forward declarations
+	 *   to be compatible with enum64 full declarations;
+	 * - skip comparing kind for the same reason.
+	 */
 	return t1->name_off == t2->name_off &&
-	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
-	       t1->size == t2->size;
+	       btf_is_any_enum(t1) && btf_is_any_enum(t2);
 }
 
 /*
@@ -3763,6 +3760,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 		break;
 
 	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
 		h = btf_hash_enum(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
@@ -3783,27 +3781,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 		}
 		break;
 
-	case BTF_KIND_ENUM64:
-		h = btf_hash_enum(t);
-		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
-			cand = btf_type_by_id(d->btf, cand_id);
-			if (btf_equal_enum64(t, cand)) {
-				new_id = cand_id;
-				break;
-			}
-			if (btf_compat_enum64(t, cand)) {
-				if (btf_is_enum_fwd(t)) {
-					/* resolve fwd to full enum */
-					new_id = cand_id;
-					break;
-				}
-				/* resolve canonical enum fwd to full enum */
-				d->map[cand_id] = type_id;
-			}
-		}
-		break;
-
 	case BTF_KIND_FWD:
 	case BTF_KIND_FLOAT:
 		h = btf_hash_common(t);
@@ -4099,10 +4076,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return btf_equal_int_tag(cand_type, canon_type);
 
 	case BTF_KIND_ENUM:
-		return btf_compat_enum(cand_type, canon_type);
-
 	case BTF_KIND_ENUM64:
-		return btf_compat_enum64(cand_type, canon_type);
+		return btf_compat_enum(cand_type, canon_type);
 
 	case BTF_KIND_FWD:
 	case BTF_KIND_FLOAT:
-- 
cgit v1.2.3-58-ga151


From 2e20f50ff84903964bcfca10ecdab0fa08cd6a4c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 2 Nov 2022 01:54:13 +0200
Subject: selftests/bpf: Tests for enum fwd resolved as full enum64

A set of test cases to verify enum fwd resolution logic:
- verify that enum fwd can be resolved as full enum64;
- verify that enum64 fwd can be resolved as full enum;
- verify that enum size is considered when enums are compared for
  equivalence.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221101235413.1824260-2-eddyz87@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 88 ++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 24dd6214394e..c3e1cea9abae 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7133,7 +7133,7 @@ static struct btf_dedup_test dedup_tests[] = {
 				BTF_ENUM_ENC(NAME_NTH(4), 456),
 			/* [4] fwd enum 'e2' after full enum */
 			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
-			/* [5] incompatible fwd enum with different size */
+			/* [5] fwd enum with different size, size does not matter for fwd */
 			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
 			/* [6] incompatible full enum with different value */
 			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
@@ -7150,9 +7150,7 @@ static struct btf_dedup_test dedup_tests[] = {
 			/* [2] full enum 'e2' */
 			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
 				BTF_ENUM_ENC(NAME_NTH(4), 456),
-			/* [3] incompatible fwd enum with different size */
-			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
-			/* [4] incompatible full enum with different value */
+			/* [3] incompatible full enum with different value */
 			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
 				BTF_ENUM_ENC(NAME_NTH(2), 321),
 			BTF_END_RAW,
@@ -7611,7 +7609,87 @@ static struct btf_dedup_test dedup_tests[] = {
 		BTF_STR_SEC("\0e1\0e1_val"),
 	},
 },
-
+{
+	.descr = "dedup: enum of different size: no dedup",
+	.input = {
+		.raw_types = {
+			/* [1] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			/* [2] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val"),
+	},
+	.expect = {
+		.raw_types = {
+			/* [1] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			/* [2] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val"),
+	},
+},
+{
+	.descr = "dedup: enum fwd to enum64",
+	.input = {
+		.raw_types = {
+			/* [1] enum64 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+				BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+			/* [2] enum 'e1' fwd */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+			/* [3] typedef enum 'e1' td */
+			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val\0td"),
+	},
+	.expect = {
+		.raw_types = {
+			/* [1] enum64 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+				BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+			/* [2] typedef enum 'e1' td */
+			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val\0td"),
+	},
+},
+{
+	.descr = "dedup: enum64 fwd to enum",
+	.input = {
+		.raw_types = {
+			/* [1] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			/* [2] enum64 'e1' fwd */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+			/* [3] typedef enum 'e1' td */
+			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val\0td"),
+	},
+	.expect = {
+		.raw_types = {
+			/* [1] enum 'e1' */
+			BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+				BTF_ENUM_ENC(NAME_NTH(2), 1),
+			/* [2] typedef enum 'e1' td */
+			BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0e1\0e1_val\0td"),
+	},
+},
 };
 
 static int btf_type_size(const struct btf_type *t)
-- 
cgit v1.2.3-58-ga151


From b3c09fdca1135167bd0682128cd51864c5aa5625 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Fri, 4 Nov 2022 09:27:54 +0800
Subject: selftests/bpf: cgroup_helpers.c: Fix strncpy() fortify warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copy libbpf_strlcpy() from libbpf_internal.h to bpf_util.h, and rename it
to bpf_strlcpy(), then replace selftests strncpy()/libbpf_strlcpy() with
bpf_strlcpy(), fix compile warning.

The libbpf_internal.h header cannot be used directly here, because
references to cgroup_helpers.c in samples/bpf will generate compilation
errors. We also can't add libbpf_strlcpy() directly to bpf_util.h,
because the definition of libbpf_strlcpy() in libbpf_internal.h is
duplicated. In order not to modify the libbpf code, add a new function
bpf_strlcpy() to selftests bpf_util.h.

How to reproduce this compilation warning:

$ make -C samples/bpf
cgroup_helpers.c: In function ‘__enable_controllers’:
cgroup_helpers.c:80:17: warning: ‘strncpy’ specified bound 4097 equals destination size [-Wstringop-truncation]
   80 |                 strncpy(enable, controllers, sizeof(enable));
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Rong Tao <rongtao@cestc.cn>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/tencent_469D8AF32BD56816A29981BED06E96D22506@qq.com
---
 tools/testing/selftests/bpf/bpf_util.h       | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/cgroup_helpers.c |  3 ++-
 tools/testing/selftests/bpf/xsk.c            | 26 ++++----------------------
 3 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a3352a64c067..10587a29b967 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -20,6 +20,25 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	return possible_cpus;
 }
 
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+	size_t i;
+
+	if (sz == 0)
+		return;
+
+	sz--;
+	for (i = 0; i < sz && src[i]; i++)
+		dst[i] = src[i];
+	dst[i] = '\0';
+}
+
 #define __bpf_percpu_val_align	__attribute__((__aligned__(8)))
 
 #define BPF_DECLARE_PERCPU(type, name)				\
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e914cc45b766..dd1aa5afcf5a 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -13,6 +13,7 @@
 #include <ftw.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_util.h"
 
 /*
  * To avoid relying on the system setup, when setup_cgroup_env is called
@@ -77,7 +78,7 @@ static int __enable_controllers(const char *cgroup_path, const char *controllers
 		enable[len] = 0;
 		close(fd);
 	} else {
-		strncpy(enable, controllers, sizeof(enable));
+		bpf_strlcpy(enable, controllers, sizeof(enable));
 	}
 
 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 0b3ff49c740d..39d349509ba4 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -33,6 +33,7 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "xsk.h"
+#include "bpf_util.h"
 
 #ifndef SOL_XDP
  #define SOL_XDP 283
@@ -521,25 +522,6 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
 	return 0;
 }
 
-/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
- * is zero-terminated string no matter what (unless sz == 0, in which case
- * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
- * in what is returned. Given this is internal helper, it's trivial to extend
- * this, when necessary. Use this instead of strncpy inside libbpf source code.
- */
-static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
-{
-        size_t i;
-
-        if (sz == 0)
-                return;
-
-        sz--;
-        for (i = 0; i < sz && src[i]; i++)
-                dst[i] = src[i];
-        dst[i] = '\0';
-}
-
 static int xsk_get_max_queues(struct xsk_socket *xsk)
 {
 	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
@@ -552,7 +534,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
 		return -errno;
 
 	ifr.ifr_data = (void *)&channels;
-	libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
+	bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
 	err = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (err && errno != EOPNOTSUPP) {
 		ret = -errno;
@@ -771,7 +753,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 	}
 
 	ctx->ifindex = ifindex;
-	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
 
 	xsk->ctx = ctx;
 	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
@@ -958,7 +940,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->refcount = 1;
 	ctx->umem = umem;
 	ctx->queue_id = queue_id;
-	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
 
 	ctx->fill = fill;
 	ctx->comp = comp;
-- 
cgit v1.2.3-58-ga151


From 61fc5e66f755db24d27ba37ce1ee4873def1a074 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Fri, 4 Nov 2022 10:40:16 +0100
Subject: selftests/bpf: Use consistent build-id type for liburandom_read.so

lld produces "fast" style build-ids by default, which is inconsistent
with ld's "sha1" style. Explicitly specify build-id style to be "sha1"
when linking liburandom_read.so the same way it is already done for
urandom_read.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20221104094016.102049-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 79edef1dbda4..f3cd17026ee5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -182,14 +182,15 @@ endif
 $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
 	$(call msg,LIB,,$@)
 	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS)   \
-		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@
+		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+		     -fPIC -shared -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
 	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
 		     liburandom_read.so $(LDLIBS)			       \
-		     -fuse-ld=$(LLD) -Wl,-znoseparate-code		       \
-		     -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
+		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+		     -Wl,-rpath=. -o $@
 
 $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
 	$(call msg,SIGN-FILE,,$@)
-- 
cgit v1.2.3-58-ga151


From e8f50c4f0c14214b6430b8b3493654a286681a2b Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Sun, 6 Nov 2022 03:36:56 +0900
Subject: selftests/bpf: Fix u32 variable compared with less than zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Variable ret is compared with less than zero even though it was set as u32.
So u32 to int conversion is needed.

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/r/20221105183656.86077-1-tegongkang@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 681a5db80dae..162d3a516f2c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1006,7 +1006,8 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
 	bool use_poll = ifobject->use_poll;
-	u32 i, idx = 0, ret, valid_pkts = 0;
+	u32 i, idx = 0, valid_pkts = 0;
+	int ret;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
 		if (use_poll) {
-- 
cgit v1.2.3-58-ga151


From da23a713d1de1f07675fd96418c0dd060c3633b6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 8 Nov 2022 11:47:18 +0100
Subject: selftests: devlink_lib: Split out helper

Merely checking whether a trap counter incremented or not without
logging a test result is useful on its own. Split this functionality to
a helper which will be used by subsequent patches.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/devlink_lib.sh | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 601990c6881b..f1de525cfa55 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -503,25 +503,30 @@ devlink_trap_drop_cleanup()
 	tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
 }
 
-devlink_trap_stats_test()
+devlink_trap_stats_check()
 {
-	local test_name=$1; shift
 	local trap_name=$1; shift
 	local send_one="$@"
 	local t0_packets
 	local t1_packets
 
-	RET=0
-
 	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
 
 	$send_one && sleep 1
 
 	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
 
-	if [[ $t1_packets -eq $t0_packets ]]; then
-		check_err 1 "Trap stats did not increase"
-	fi
+	[[ $t1_packets -ne $t0_packets ]]
+}
+
+devlink_trap_stats_test()
+{
+	local test_name=$1; shift
+
+	RET=0
+
+	devlink_trap_stats_check "$@"
+	check_err $? "Trap stats did not increase"
 
 	log_test "$test_name"
 }
-- 
cgit v1.2.3-58-ga151


From 25a26f0c2015be5e23ac467ba825085aaa01a0f0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 8 Nov 2022 11:47:19 +0100
Subject: selftests: mlxsw: Add a test for EAPOL trap

Test that packets with a destination MAC of 01:80:C2:00:00:03 trigger
the "eapol" packet trap.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/devlink_trap_control.sh      | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
index d3a891d421ab..64153bbf95df 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -83,6 +83,7 @@ ALL_TESTS="
 	ptp_general_test
 	flow_action_sample_test
 	flow_action_trap_test
+	eapol_test
 "
 NUM_NETIFS=4
 source $lib_dir/lib.sh
@@ -677,6 +678,27 @@ flow_action_trap_test()
 	tc qdisc del dev $rp1 clsact
 }
 
+eapol_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:03:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:8E:"$(                   : ETH type
+		)
+	echo $p
+}
+
+eapol_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \
+		$(eapol_payload_get $h1mac) -p 100 -q
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-58-ga151


From fb398432db2f66f46714b855354c79223649e3ab Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 8 Nov 2022 11:47:20 +0100
Subject: selftests: mlxsw: Add a test for locked port trap

Test that packets received via a locked bridge port whose {SMAC, VID}
does not appear in the bridge's FDB or appears with a different port,
trigger the "locked_port" packet trap.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/devlink_trap_l2_drops.sh     | 105 +++++++++++++++++++++
 1 file changed, 105 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index a4c2812e9807..8d4b2c6265b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -14,6 +14,7 @@ ALL_TESTS="
 	ingress_stp_filter_test
 	port_list_is_empty_test
 	port_loopback_filter_test
+	locked_port_test
 "
 NUM_NETIFS=4
 source $lib_dir/tc_common.sh
@@ -420,6 +421,110 @@ port_loopback_filter_test()
 	port_loopback_filter_uc_test
 }
 
+locked_port_miss_test()
+{
+	local trap_name="locked_port"
+	local smac=00:11:22:33:44:55
+
+	bridge link set dev $swp1 learning off
+	bridge link set dev $swp1 locked on
+
+	RET=0
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased before setting action to \"trap\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_err $? "Trap stats did not increase when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after setting action to \"drop\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	bridge fdb replace $smac dev $swp1 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after adding an FDB entry"
+
+	bridge fdb del $smac dev $swp1 master static vlan 1
+	bridge link set dev $swp1 locked off
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after unlocking port"
+
+	log_test "Locked port - FDB miss"
+
+	devlink_trap_action_set $trap_name "drop"
+	bridge link set dev $swp1 learning on
+}
+
+locked_port_mismatch_test()
+{
+	local trap_name="locked_port"
+	local smac=00:11:22:33:44:55
+
+	bridge link set dev $swp1 learning off
+	bridge link set dev $swp1 locked on
+
+	RET=0
+
+	bridge fdb replace $smac dev $swp2 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased before setting action to \"trap\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_err $? "Trap stats did not increase when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after setting action to \"drop\""
+
+	devlink_trap_action_set $trap_name "trap"
+	bridge link set dev $swp1 locked off
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after unlocking port"
+
+	bridge link set dev $swp1 locked on
+	bridge fdb replace $smac dev $swp1 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after replacing an FDB entry"
+
+	bridge fdb del $smac dev $swp1 master static vlan 1
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Locked port - FDB mismatch"
+
+	bridge link set dev $swp1 locked off
+	bridge link set dev $swp1 learning on
+}
+
+locked_port_test()
+{
+	locked_port_miss_test
+	locked_port_mismatch_test
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-58-ga151


From cdbde7edf0e53b643b1323e05bc1ca99661f6bcd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 8 Nov 2022 11:47:21 +0100
Subject: selftests: mlxsw: Add a test for invalid locked bridge port
 configurations

Test that locked bridge port configurations that are not supported by
mlxsw are rejected.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 31 ++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 04f03ae9d8fb..5e89657857c7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -34,6 +34,7 @@ ALL_TESTS="
 	nexthop_obj_bucket_offload_test
 	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
+	bridge_locked_port_test
 	devlink_reload_test
 "
 NUM_NETIFS=2
@@ -917,6 +918,36 @@ nexthop_obj_route_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+bridge_locked_port_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 0
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip link set dev $swp1.10 master br1
+
+	bridge link set dev $swp1.10 locked on
+	check_fail $? "managed to set locked flag on a VLAN upper"
+
+	ip link set dev $swp1.10 nomaster
+	ip link set dev $swp1 master br1
+
+	bridge link set dev $swp1 locked on
+	check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper"
+
+	ip link del dev $swp1.10
+	bridge link set dev $swp1 locked on
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	check_fail $? "managed to configure a VLAN upper on a locked port"
+
+	log_test "bridge locked port"
+
+	ip link del dev $swp1.10 &> /dev/null
+	ip link del dev br1
+}
+
 devlink_reload_test()
 {
 	# Test that after executing all the above configuration tests, a
-- 
cgit v1.2.3-58-ga151


From c302378bc157f6a73b6cae4ca67f5f6aa931dcec Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 9 Nov 2022 16:26:09 +0200
Subject: libbpf: Hashmap interface update to allow both long and void*
 keys/values

An update for libbpf's hashmap interface from void* -> void* to a
polymorphic one, allowing both long and void* keys and values.

This simplifies many use cases in libbpf as hashmaps there are mostly
integer to integer.

Perf copies hashmap implementation from libbpf and has to be
updated as well.

Changes to libbpf, selftests/bpf and perf are packed as a single
commit to avoid compilation issues with any future bisect.

Polymorphic interface is acheived by hiding hashmap interface
functions behind auxiliary macros that take care of necessary
type casts, for example:

    #define hashmap_cast_ptr(p)						\
	({								\
		_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),\
			       #p " pointee should be a long-sized integer or a pointer"); \
		(long *)(p);						\
	})

    bool hashmap_find(const struct hashmap *map, long key, long *value);

    #define hashmap__find(map, key, value) \
		hashmap_find((map), (long)(key), hashmap_cast_ptr(value))

- hashmap__find macro casts key and value parameters to long
  and long* respectively
- hashmap_cast_ptr ensures that value pointer points to a memory
  of appropriate size.

This hack was suggested by Andrii Nakryiko in [1].
This is a follow up for [2].

[1] https://lore.kernel.org/bpf/CAEf4BzZ8KFneEJxFAaNCCFPGqp20hSpS2aCj76uRk3-qZUH5xg@mail.gmail.com/
[2] https://lore.kernel.org/bpf/af1facf9-7bc8-8a3d-0db4-7b3f333589a2@meta.com/T/#m65b28f1d6d969fcd318b556db6a3ad499a42607d

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221109142611.879983-2-eddyz87@gmail.com
---
 tools/bpf/bpftool/btf.c                            |  25 +--
 tools/bpf/bpftool/common.c                         |  10 +-
 tools/bpf/bpftool/gen.c                            |  19 +--
 tools/bpf/bpftool/link.c                           |  10 +-
 tools/bpf/bpftool/main.h                           |  14 +-
 tools/bpf/bpftool/map.c                            |  10 +-
 tools/bpf/bpftool/pids.c                           |  16 +-
 tools/bpf/bpftool/prog.c                           |  10 +-
 tools/lib/bpf/btf.c                                |  41 +++--
 tools/lib/bpf/btf_dump.c                           |  15 +-
 tools/lib/bpf/hashmap.c                            |  18 +-
 tools/lib/bpf/hashmap.h                            |  90 ++++++----
 tools/lib/bpf/libbpf.c                             |  18 +-
 tools/lib/bpf/strset.c                             |  18 +-
 tools/lib/bpf/usdt.c                               |  28 ++-
 tools/perf/tests/expr.c                            |  28 ++-
 tools/perf/tests/pmu-events.c                      |   6 +-
 tools/perf/util/bpf-loader.c                       |  11 +-
 tools/perf/util/evsel.c                            |   2 +-
 tools/perf/util/expr.c                             |  36 ++--
 tools/perf/util/hashmap.c                          |  18 +-
 tools/perf/util/hashmap.h                          |  90 ++++++----
 tools/perf/util/metricgroup.c                      |  10 +-
 tools/perf/util/stat-shadow.c                      |   2 +-
 tools/perf/util/stat.c                             |   9 +-
 tools/testing/selftests/bpf/prog_tests/hashmap.c   | 190 +++++++++++++++------
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   |   6 +-
 27 files changed, 410 insertions(+), 340 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 68a70ac03c80..b87e4a7fd689 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -815,8 +815,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
 		if (!btf_id)
 			continue;
 
-		err = hashmap__append(tab, u32_as_hash_field(btf_id),
-				      u32_as_hash_field(id));
+		err = hashmap__append(tab, btf_id, id);
 		if (err) {
 			p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s",
 			      btf_id, id, strerror(-err));
@@ -875,17 +874,13 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 	printf("size %uB", info->btf_size);
 
 	n = 0;
-	hashmap__for_each_key_entry(btf_prog_table, entry,
-				    u32_as_hash_field(info->id)) {
-		printf("%s%u", n++ == 0 ? "  prog_ids " : ",",
-		       hash_field_as_u32(entry->value));
+	hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
+		printf("%s%lu", n++ == 0 ? "  prog_ids " : ",", entry->value);
 	}
 
 	n = 0;
-	hashmap__for_each_key_entry(btf_map_table, entry,
-				    u32_as_hash_field(info->id)) {
-		printf("%s%u", n++ == 0 ? "  map_ids " : ",",
-		       hash_field_as_u32(entry->value));
+	hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
+		printf("%s%lu", n++ == 0 ? "  map_ids " : ",", entry->value);
 	}
 
 	emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
@@ -907,17 +902,15 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 
 	jsonw_name(json_wtr, "prog_ids");
 	jsonw_start_array(json_wtr);	/* prog_ids */
-	hashmap__for_each_key_entry(btf_prog_table, entry,
-				    u32_as_hash_field(info->id)) {
-		jsonw_uint(json_wtr, hash_field_as_u32(entry->value));
+	hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
+		jsonw_uint(json_wtr, entry->value);
 	}
 	jsonw_end_array(json_wtr);	/* prog_ids */
 
 	jsonw_name(json_wtr, "map_ids");
 	jsonw_start_array(json_wtr);	/* map_ids */
-	hashmap__for_each_key_entry(btf_map_table, entry,
-				    u32_as_hash_field(info->id)) {
-		jsonw_uint(json_wtr, hash_field_as_u32(entry->value));
+	hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
+		jsonw_uint(json_wtr, entry->value);
 	}
 	jsonw_end_array(json_wtr);	/* map_ids */
 
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index e4d33bc8bbbf..d9f7b1299b03 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -494,7 +494,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
 		goto out_close;
 	}
 
-	err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path);
+	err = hashmap__append(build_fn_table, pinned_info.id, path);
 	if (err) {
 		p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
 		      pinned_info.id, path, strerror(errno));
@@ -545,7 +545,7 @@ void delete_pinned_obj_table(struct hashmap *map)
 		return;
 
 	hashmap__for_each_entry(map, entry, bkt)
-		free(entry->value);
+		free(entry->pvalue);
 
 	hashmap__free(map);
 }
@@ -1041,12 +1041,12 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
 	return fd;
 }
 
-size_t hash_fn_for_key_as_id(const void *key, void *ctx)
+size_t hash_fn_for_key_as_id(long key, void *ctx)
 {
-	return (size_t)key;
+	return key;
 }
 
-bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx)
 {
 	return k1 == k2;
 }
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index cf8b4e525c88..01bb8d8f5568 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1660,21 +1660,16 @@ struct btfgen_info {
 	struct btf *marked_btf; /* btf structure used to mark used types */
 };
 
-static size_t btfgen_hash_fn(const void *key, void *ctx)
+static size_t btfgen_hash_fn(long key, void *ctx)
 {
-	return (size_t)key;
+	return key;
 }
 
-static bool btfgen_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool btfgen_equal_fn(long k1, long k2, void *ctx)
 {
 	return k1 == k2;
 }
 
-static void *u32_as_hash_key(__u32 x)
-{
-	return (void *)(uintptr_t)x;
-}
-
 static void btfgen_free_info(struct btfgen_info *info)
 {
 	if (!info)
@@ -2086,18 +2081,18 @@ static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path)
 			struct bpf_core_spec specs_scratch[3] = {};
 			struct bpf_core_relo_res targ_res = {};
 			struct bpf_core_cand_list *cands = NULL;
-			const void *type_key = u32_as_hash_key(relo->type_id);
 			const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off);
 
 			if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
-			    !hashmap__find(cand_cache, type_key, (void **)&cands)) {
+			    !hashmap__find(cand_cache, relo->type_id, &cands)) {
 				cands = btfgen_find_cands(btf, info->src_btf, relo->type_id);
 				if (!cands) {
 					err = -errno;
 					goto out;
 				}
 
-				err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
+				err = hashmap__set(cand_cache, relo->type_id, cands,
+						   NULL, NULL);
 				if (err)
 					goto out;
 			}
@@ -2120,7 +2115,7 @@ out:
 
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
-			bpf_core_free_cands(entry->value);
+			bpf_core_free_cands(entry->pvalue);
 		}
 		hashmap__free(cand_cache);
 	}
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2863639706dd..6f4cfe01cad4 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -204,9 +204,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 
 		jsonw_name(json_wtr, "pinned");
 		jsonw_start_array(json_wtr);
-		hashmap__for_each_key_entry(link_table, entry,
-					    u32_as_hash_field(info->id))
-			jsonw_string(json_wtr, entry->value);
+		hashmap__for_each_key_entry(link_table, entry, info->id)
+			jsonw_string(json_wtr, entry->pvalue);
 		jsonw_end_array(json_wtr);
 	}
 
@@ -309,9 +308,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 	if (!hashmap__empty(link_table)) {
 		struct hashmap_entry *entry;
 
-		hashmap__for_each_key_entry(link_table, entry,
-					    u32_as_hash_field(info->id))
-			printf("\n\tpinned %s", (char *)entry->value);
+		hashmap__for_each_key_entry(link_table, entry, info->id)
+			printf("\n\tpinned %s", (char *)entry->pvalue);
 	}
 	emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 467d8472df0c..d4e8a1aef787 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -240,8 +240,8 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
 int print_all_levels(__maybe_unused enum libbpf_print_level level,
 		     const char *format, va_list args);
 
-size_t hash_fn_for_key_as_id(const void *key, void *ctx);
-bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
+size_t hash_fn_for_key_as_id(long key, void *ctx);
+bool equal_fn_for_key_as_id(long k1, long k2, void *ctx);
 
 /* bpf_attach_type_input_str - convert the provided attach type value into a
  * textual representation that we accept for input purposes.
@@ -257,16 +257,6 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx);
  */
 const char *bpf_attach_type_input_str(enum bpf_attach_type t);
 
-static inline void *u32_as_hash_field(__u32 x)
-{
-	return (void *)(uintptr_t)x;
-}
-
-static inline __u32 hash_field_as_u32(const void *x)
-{
-	return (__u32)(uintptr_t)x;
-}
-
 static inline bool hashmap__empty(struct hashmap *map)
 {
 	return map ? hashmap__size(map) == 0 : true;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f941ac5c7b73..d884070a2314 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -518,9 +518,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 
 		jsonw_name(json_wtr, "pinned");
 		jsonw_start_array(json_wtr);
-		hashmap__for_each_key_entry(map_table, entry,
-					    u32_as_hash_field(info->id))
-			jsonw_string(json_wtr, entry->value);
+		hashmap__for_each_key_entry(map_table, entry, info->id)
+			jsonw_string(json_wtr, entry->pvalue);
 		jsonw_end_array(json_wtr);
 	}
 
@@ -595,9 +594,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	if (!hashmap__empty(map_table)) {
 		struct hashmap_entry *entry;
 
-		hashmap__for_each_key_entry(map_table, entry,
-					    u32_as_hash_field(info->id))
-			printf("\n\tpinned %s", (char *)entry->value);
+		hashmap__for_each_key_entry(map_table, entry, info->id)
+			printf("\n\tpinned %s", (char *)entry->pvalue);
 	}
 
 	if (frozen_str) {
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index bb6c969a114a..00c77edb6331 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -36,8 +36,8 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e)
 	int err, i;
 	void *tmp;
 
-	hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) {
-		refs = entry->value;
+	hashmap__for_each_key_entry(map, entry, e->id) {
+		refs = entry->pvalue;
 
 		for (i = 0; i < refs->ref_cnt; i++) {
 			if (refs->refs[i].pid == e->pid)
@@ -81,7 +81,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e)
 	refs->has_bpf_cookie = e->has_bpf_cookie;
 	refs->bpf_cookie = e->bpf_cookie;
 
-	err = hashmap__append(map, u32_as_hash_field(e->id), refs);
+	err = hashmap__append(map, e->id, refs);
 	if (err)
 		p_err("failed to append entry to hashmap for ID %u: %s",
 		      e->id, strerror(errno));
@@ -183,7 +183,7 @@ void delete_obj_refs_table(struct hashmap *map)
 		return;
 
 	hashmap__for_each_entry(map, entry, bkt) {
-		struct obj_refs *refs = entry->value;
+		struct obj_refs *refs = entry->pvalue;
 
 		free(refs->refs);
 		free(refs);
@@ -200,8 +200,8 @@ void emit_obj_refs_json(struct hashmap *map, __u32 id,
 	if (hashmap__empty(map))
 		return;
 
-	hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
-		struct obj_refs *refs = entry->value;
+	hashmap__for_each_key_entry(map, entry, id) {
+		struct obj_refs *refs = entry->pvalue;
 		int i;
 
 		if (refs->ref_cnt == 0)
@@ -232,8 +232,8 @@ void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix)
 	if (hashmap__empty(map))
 		return;
 
-	hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) {
-		struct obj_refs *refs = entry->value;
+	hashmap__for_each_key_entry(map, entry, id) {
+		struct obj_refs *refs = entry->pvalue;
 		int i;
 
 		if (refs->ref_cnt == 0)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a858b907da16..9d32ffb9f22e 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -486,9 +486,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 
 		jsonw_name(json_wtr, "pinned");
 		jsonw_start_array(json_wtr);
-		hashmap__for_each_key_entry(prog_table, entry,
-					    u32_as_hash_field(info->id))
-			jsonw_string(json_wtr, entry->value);
+		hashmap__for_each_key_entry(prog_table, entry, info->id)
+			jsonw_string(json_wtr, entry->pvalue);
 		jsonw_end_array(json_wtr);
 	}
 
@@ -561,9 +560,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	if (!hashmap__empty(prog_table)) {
 		struct hashmap_entry *entry;
 
-		hashmap__for_each_key_entry(prog_table, entry,
-					    u32_as_hash_field(info->id))
-			printf("\n\tpinned %s", (char *)entry->value);
+		hashmap__for_each_key_entry(prog_table, entry, info->id)
+			printf("\n\tpinned %s", (char *)entry->pvalue);
 	}
 
 	if (info->btf_id)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 71d68bf7788c..442d4d0f98b8 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1559,15 +1559,15 @@ struct btf_pipe {
 static int btf_rewrite_str(__u32 *str_off, void *ctx)
 {
 	struct btf_pipe *p = ctx;
-	void *mapped_off;
+	long mapped_off;
 	int off, err;
 
 	if (!*str_off) /* nothing to do for empty strings */
 		return 0;
 
 	if (p->str_off_map &&
-	    hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) {
-		*str_off = (__u32)(long)mapped_off;
+	    hashmap__find(p->str_off_map, *str_off, &mapped_off)) {
+		*str_off = mapped_off;
 		return 0;
 	}
 
@@ -1579,7 +1579,7 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx)
 	 * performing expensive string comparisons.
 	 */
 	if (p->str_off_map) {
-		err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off);
+		err = hashmap__append(p->str_off_map, *str_off, off);
 		if (err)
 			return err;
 	}
@@ -1630,8 +1630,8 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx)
 	return 0;
 }
 
-static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx);
-static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx);
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx);
 
 int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 {
@@ -3126,12 +3126,11 @@ static long hash_combine(long h, long value)
 }
 
 #define for_each_dedup_cand(d, node, hash) \
-	hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+	hashmap__for_each_key_entry(d->dedup_table, node, hash)
 
 static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
 {
-	return hashmap__append(d->dedup_table,
-			       (void *)hash, (void *)(long)type_id);
+	return hashmap__append(d->dedup_table, hash, type_id);
 }
 
 static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -3178,17 +3177,17 @@ static void btf_dedup_free(struct btf_dedup *d)
 	free(d);
 }
 
-static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_identity_hash_fn(long key, void *ctx)
 {
-	return (size_t)key;
+	return key;
 }
 
-static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+static size_t btf_dedup_collision_hash_fn(long key, void *ctx)
 {
 	return 0;
 }
 
-static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool btf_dedup_equal_fn(long k1, long k2, void *ctx)
 {
 	return k1 == k2;
 }
@@ -3750,7 +3749,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_INT:
 		h = btf_hash_int_decl_tag(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_int_tag(t, cand)) {
 				new_id = cand_id;
@@ -3763,7 +3762,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_ENUM64:
 		h = btf_hash_enum(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_enum(t, cand)) {
 				new_id = cand_id;
@@ -3785,7 +3784,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_FLOAT:
 		h = btf_hash_common(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_common(t, cand)) {
 				new_id = cand_id;
@@ -4288,7 +4287,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 
 	h = btf_hash_struct(t);
 	for_each_dedup_cand(d, hash_entry, h) {
-		__u32 cand_id = (__u32)(long)hash_entry->value;
+		__u32 cand_id = hash_entry->value;
 		int eq;
 
 		/*
@@ -4393,7 +4392,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 
 		h = btf_hash_common(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_common(t, cand)) {
 				new_id = cand_id;
@@ -4410,7 +4409,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 
 		h = btf_hash_int_decl_tag(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_int_tag(t, cand)) {
 				new_id = cand_id;
@@ -4434,7 +4433,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 
 		h = btf_hash_array(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_array(t, cand)) {
 				new_id = cand_id;
@@ -4466,7 +4465,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 
 		h = btf_hash_fnproto(t);
 		for_each_dedup_cand(d, hash_entry, h) {
-			cand_id = (__u32)(long)hash_entry->value;
+			cand_id = hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
 			if (btf_equal_fnproto(t, cand)) {
 				new_id = cand_id;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bf0cc0e986dd..12f7039e0ab2 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -117,14 +117,14 @@ struct btf_dump {
 	struct btf_dump_data *typed_dump;
 };
 
-static size_t str_hash_fn(const void *key, void *ctx)
+static size_t str_hash_fn(long key, void *ctx)
 {
-	return str_hash(key);
+	return str_hash((void *)key);
 }
 
-static bool str_equal_fn(const void *a, const void *b, void *ctx)
+static bool str_equal_fn(long a, long b, void *ctx)
 {
-	return strcmp(a, b) == 0;
+	return strcmp((void *)a, (void *)b) == 0;
 }
 
 static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
@@ -225,7 +225,7 @@ static void btf_dump_free_names(struct hashmap *map)
 	struct hashmap_entry *cur;
 
 	hashmap__for_each_entry(map, cur, bkt)
-		free((void *)cur->key);
+		free((void *)cur->pkey);
 
 	hashmap__free(map);
 }
@@ -1543,11 +1543,10 @@ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
 	if (!new_name)
 		return 1;
 
-	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+	hashmap__find(name_map, orig_name, &dup_cnt);
 	dup_cnt++;
 
-	err = hashmap__set(name_map, new_name, (void *)dup_cnt,
-			   (const void **)&old_name, NULL);
+	err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
 	if (err)
 		free(new_name);
 
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index aeb09c288716..140ee4055676 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
 }
 
 static bool hashmap_find_entry(const struct hashmap *map,
-			       const void *key, size_t hash,
+			       const long key, size_t hash,
 			       struct hashmap_entry ***pprev,
 			       struct hashmap_entry **entry)
 {
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
 	return false;
 }
 
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
 	int err;
 
 	if (old_key)
-		*old_key = NULL;
+		*old_key = 0;
 	if (old_value)
-		*old_value = NULL;
+		*old_value = 0;
 
 	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
 	if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
 	return 0;
 }
 
-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
 	return true;
 }
 
-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+		    long *old_key, long *old_value)
 {
 	struct hashmap_entry **pprev, *entry;
 	size_t h;
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index 10a4c4cd13cf..3fe647477bad 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
 	return h;
 }
 
-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
 
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ *   behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ *   long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ *   type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ *   hasmap_entry->key should be used for integer keys and
+ *   hasmap_entry->pkey should be used for pointer keys,
+ *   same goes for values.
+ */
 struct hashmap_entry {
-	const void *key;
-	void *value;
+	union {
+		long key;
+		const void *pkey;
+	};
+	union {
+		long value;
+		void *pvalue;
+	};
 	struct hashmap_entry *next;
 };
 
@@ -102,6 +122,12 @@ enum hashmap_insert_strategy {
 	HASHMAP_APPEND,
 };
 
+#define hashmap_cast_ptr(p) ({								\
+	_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),			\
+		       #p " pointee should be a long-sized integer or a pointer");	\
+	(long *)(p);									\
+})
+
 /*
  * hashmap__insert() adds key/value entry w/ various semantics, depending on
  * provided strategy value. If a given key/value pair replaced already
@@ -109,42 +135,38 @@ enum hashmap_insert_strategy {
  * through old_key and old_value to allow calling code do proper memory
  * management.
  */
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value);
 
-static inline int hashmap__add(struct hashmap *map,
-			       const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+	hashmap_insert((map), (long)(key), (long)(value), (strategy),  \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))
 
-static inline int hashmap__set(struct hashmap *map,
-			       const void *key, void *value,
-			       const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_SET,
-			       old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
 
-static inline int hashmap__update(struct hashmap *map,
-				  const void *key, void *value,
-				  const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
-			       old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
 
-static inline int hashmap__append(struct hashmap *map,
-				  const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value)		       \
+	hashmap_delete((map), (long)(key),			       \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))
 
-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value);
+bool hashmap_find(const struct hashmap *map, long key, long *value);
 
-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+#define hashmap__find(map, key, value) \
+	hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
 
 /*
  * hashmap__for_each_entry - iterate over all entries in hashmap
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5d7819edf074..1d263885d635 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5601,21 +5601,16 @@ int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
 	return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
 }
 
-static size_t bpf_core_hash_fn(const void *key, void *ctx)
+static size_t bpf_core_hash_fn(const long key, void *ctx)
 {
-	return (size_t)key;
+	return key;
 }
 
-static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
 {
 	return k1 == k2;
 }
 
-static void *u32_as_hash_key(__u32 x)
-{
-	return (void *)(uintptr_t)x;
-}
-
 static int record_relo_core(struct bpf_program *prog,
 			    const struct bpf_core_relo *core_relo, int insn_idx)
 {
@@ -5658,7 +5653,6 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
 				 struct bpf_core_relo_res *targ_res)
 {
 	struct bpf_core_spec specs_scratch[3] = {};
-	const void *type_key = u32_as_hash_key(relo->type_id);
 	struct bpf_core_cand_list *cands = NULL;
 	const char *prog_name = prog->name;
 	const struct btf_type *local_type;
@@ -5675,7 +5669,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
 		return -EINVAL;
 
 	if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
-	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {
+	    !hashmap__find(cand_cache, local_id, &cands)) {
 		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 		if (IS_ERR(cands)) {
 			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
@@ -5683,7 +5677,7 @@ static int bpf_core_resolve_relo(struct bpf_program *prog,
 				local_name, PTR_ERR(cands));
 			return PTR_ERR(cands);
 		}
-		err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
+		err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
 		if (err) {
 			bpf_core_free_cands(cands);
 			return err;
@@ -5806,7 +5800,7 @@ out:
 
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
-			bpf_core_free_cands(entry->value);
+			bpf_core_free_cands(entry->pvalue);
 		}
 		hashmap__free(cand_cache);
 	}
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
index ea655318153f..2464bcbd04e0 100644
--- a/tools/lib/bpf/strset.c
+++ b/tools/lib/bpf/strset.c
@@ -19,19 +19,19 @@ struct strset {
 	struct hashmap *strs_hash;
 };
 
-static size_t strset_hash_fn(const void *key, void *ctx)
+static size_t strset_hash_fn(long key, void *ctx)
 {
 	const struct strset *s = ctx;
-	const char *str = s->strs_data + (long)key;
+	const char *str = s->strs_data + key;
 
 	return str_hash(str);
 }
 
-static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+static bool strset_equal_fn(long key1, long key2, void *ctx)
 {
 	const struct strset *s = ctx;
-	const char *str1 = s->strs_data + (long)key1;
-	const char *str2 = s->strs_data + (long)key2;
+	const char *str1 = s->strs_data + key1;
+	const char *str2 = s->strs_data + key2;
 
 	return strcmp(str1, str2) == 0;
 }
@@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini
 			/* hashmap__add() returns EEXIST if string with the same
 			 * content already is in the hash map
 			 */
-			err = hashmap__add(hash, (void *)off, (void *)off);
+			err = hashmap__add(hash, off, off);
 			if (err == -EEXIST)
 				continue; /* duplicate */
 			if (err)
@@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s)
 	new_off = set->strs_data_len;
 	memcpy(p, s, len);
 
-	if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+	if (hashmap__find(set->strs_hash, new_off, &old_off))
 		return old_off;
 
 	return -ENOENT;
@@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s)
 	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
 	 * string exists, we'll get its offset in old_off (that's old_key).
 	 */
-	err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
-			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	err = hashmap__insert(set->strs_hash, new_off, new_off,
+			      HASHMAP_ADD, &old_off, NULL);
 	if (err == -EEXIST)
 		return old_off; /* duplicated string, return existing offset */
 	if (err)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 28fa1b2283de..b8daae265f99 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -873,31 +873,27 @@ static void bpf_link_usdt_dealloc(struct bpf_link *link)
 	free(usdt_link);
 }
 
-static size_t specs_hash_fn(const void *key, void *ctx)
+static size_t specs_hash_fn(long key, void *ctx)
 {
-	const char *s = key;
-
-	return str_hash(s);
+	return str_hash((char *)key);
 }
 
-static bool specs_equal_fn(const void *key1, const void *key2, void *ctx)
+static bool specs_equal_fn(long key1, long key2, void *ctx)
 {
-	const char *s1 = key1;
-	const char *s2 = key2;
-
-	return strcmp(s1, s2) == 0;
+	return strcmp((char *)key1, (char *)key2) == 0;
 }
 
 static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
 			    struct bpf_link_usdt *link, struct usdt_target *target,
 			    int *spec_id, bool *is_new)
 {
-	void *tmp;
+	long tmp;
+	void *new_ids;
 	int err;
 
 	/* check if we already allocated spec ID for this spec string */
 	if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
-		*spec_id = (long)tmp;
+		*spec_id = tmp;
 		*is_new = false;
 		return 0;
 	}
@@ -905,17 +901,17 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
 	/* otherwise it's a new ID that needs to be set up in specs map and
 	 * returned back to usdt_manager when USDT link is detached
 	 */
-	tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
-	if (!tmp)
+	new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
+	if (!new_ids)
 		return -ENOMEM;
-	link->spec_ids = tmp;
+	link->spec_ids = new_ids;
 
 	/* get next free spec ID, giving preference to free list, if not empty */
 	if (man->free_spec_cnt) {
 		*spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
 
 		/* cache spec ID for current spec string for future lookups */
-		err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+		err = hashmap__add(specs_hash, target->spec_str, *spec_id);
 		if (err)
 			 return err;
 
@@ -928,7 +924,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash
 		*spec_id = man->next_free_spec_id;
 
 		/* cache spec ID for current spec string for future lookups */
-		err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+		err = hashmap__add(specs_hash, target->spec_str, *spec_id);
 		if (err)
 			 return err;
 
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 6512f5e22045..c598f95aebf3 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -130,12 +130,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 			expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO",
 					ctx) == 0);
 	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3);
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR",
-						    (void **)&val_ptr));
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ",
-						    (void **)&val_ptr));
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO",
-						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", &val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", &val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", &val_ptr));
 
 	expr__ctx_clear(ctx);
 	ctx->sctx.runtime = 3;
@@ -143,20 +140,16 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 			expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
 					NULL, ctx) == 0);
 	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@",
-						    (void **)&val_ptr));
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@",
-						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", &val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", &val_ptr));
 
 	expr__ctx_clear(ctx);
 	TEST_ASSERT_VAL("find ids",
 			expr__find_ids("dash\\-event1 - dash\\-event2",
 				       NULL, ctx) == 0);
 	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1",
-						    (void **)&val_ptr));
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2",
-						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1", &val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2", &val_ptr));
 
 	/* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */
 	{
@@ -174,7 +167,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 		TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
 		TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
 							  smton ? "EVENT1" : "EVENT2",
-							  (void **)&val_ptr));
+							  &val_ptr));
 
 		expr__ctx_clear(ctx);
 		TEST_ASSERT_VAL("find ids",
@@ -183,7 +176,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 		TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
 		TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
 							  corewide ? "EVENT1" : "EVENT2",
-							  (void **)&val_ptr));
+							  &val_ptr));
 
 	}
 	/* The expression is a constant 1.0 without needing to evaluate EVENT1. */
@@ -220,8 +213,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 			expr__find_ids("source_count(EVENT1)",
 			NULL, ctx) == 0);
 	TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1);
-	TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1",
-							(void **)&val_ptr));
+	TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1", &val_ptr));
 
 	expr__ctx_free(ctx);
 
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 097e05c796ab..3c2ee55e75c7 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -986,10 +986,10 @@ static int metric_parse_fake(const char *str)
 	 */
 	i = 1;
 	hashmap__for_each_entry(ctx->ids, cur, bkt)
-		expr__add_id_val(ctx, strdup(cur->key), i++);
+		expr__add_id_val(ctx, strdup(cur->pkey), i++);
 
 	hashmap__for_each_entry(ctx->ids, cur, bkt) {
-		if (check_parse_fake(cur->key)) {
+		if (check_parse_fake(cur->pkey)) {
 			pr_err("check_parse_fake failed\n");
 			goto out;
 		}
@@ -1003,7 +1003,7 @@ static int metric_parse_fake(const char *str)
 		 */
 		i = 1024;
 		hashmap__for_each_entry(ctx->ids, cur, bkt)
-			expr__add_id_val(ctx, strdup(cur->key), i--);
+			expr__add_id_val(ctx, strdup(cur->pkey), i--);
 		if (expr__parse(&result, ctx, str)) {
 			pr_err("expr__parse failed\n");
 			ret = -1;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f4adeccdbbcb..a5dbd71cb9ab 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -318,7 +318,7 @@ static void bpf_program_hash_free(void)
 		return;
 
 	hashmap__for_each_entry(bpf_program_hash, cur, bkt)
-		clear_prog_priv(cur->key, cur->value);
+		clear_prog_priv(cur->pkey, cur->pvalue);
 
 	hashmap__free(bpf_program_hash);
 	bpf_program_hash = NULL;
@@ -339,13 +339,12 @@ void bpf__clear(void)
 	bpf_map_hash_free();
 }
 
-static size_t ptr_hash(const void *__key, void *ctx __maybe_unused)
+static size_t ptr_hash(const long __key, void *ctx __maybe_unused)
 {
-	return (size_t) __key;
+	return __key;
 }
 
-static bool ptr_equal(const void *key1, const void *key2,
-			  void *ctx __maybe_unused)
+static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused)
 {
 	return key1 == key2;
 }
@@ -1185,7 +1184,7 @@ static void bpf_map_hash_free(void)
 		return;
 
 	hashmap__for_each_entry(bpf_map_hash, cur, bkt)
-		bpf_map_priv__clear(cur->key, cur->value);
+		bpf_map_priv__clear(cur->pkey, cur->pvalue);
 
 	hashmap__free(bpf_map_hash);
 	bpf_map_hash = NULL;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 76605fde3507..9e8a1294c981 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3123,7 +3123,7 @@ void evsel__zero_per_pkg(struct evsel *evsel)
 
 	if (evsel->per_pkg_mask) {
 		hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
-			free((char *)cur->key);
+			free((void *)cur->pkey);
 
 		hashmap__clear(evsel->per_pkg_mask);
 	}
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index aaacf514dc09..2f05ecdcfe9a 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -46,7 +46,7 @@ struct expr_id_data {
 	} kind;
 };
 
-static size_t key_hash(const void *key, void *ctx __maybe_unused)
+static size_t key_hash(long key, void *ctx __maybe_unused)
 {
 	const char *str = (const char *)key;
 	size_t hash = 0;
@@ -59,8 +59,7 @@ static size_t key_hash(const void *key, void *ctx __maybe_unused)
 	return hash;
 }
 
-static bool key_equal(const void *key1, const void *key2,
-		    void *ctx __maybe_unused)
+static bool key_equal(long key1, long key2, void *ctx __maybe_unused)
 {
 	return !strcmp((const char *)key1, (const char *)key2);
 }
@@ -84,8 +83,8 @@ void ids__free(struct hashmap *ids)
 		return;
 
 	hashmap__for_each_entry(ids, cur, bkt) {
-		free((char *)cur->key);
-		free(cur->value);
+		free((void *)cur->pkey);
+		free((void *)cur->pvalue);
 	}
 
 	hashmap__free(ids);
@@ -97,8 +96,7 @@ int ids__insert(struct hashmap *ids, const char *id)
 	char *old_key = NULL;
 	int ret;
 
-	ret = hashmap__set(ids, id, data_ptr,
-			   (const void **)&old_key, (void **)&old_data);
+	ret = hashmap__set(ids, id, data_ptr, &old_key, &old_data);
 	if (ret)
 		free(data_ptr);
 	free(old_key);
@@ -127,8 +125,7 @@ struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2)
 		ids2 = tmp;
 	}
 	hashmap__for_each_entry(ids2, cur, bkt) {
-		ret = hashmap__set(ids1, cur->key, cur->value,
-				(const void **)&old_key, (void **)&old_data);
+		ret = hashmap__set(ids1, cur->key, cur->value, &old_key, &old_data);
 		free(old_key);
 		free(old_data);
 
@@ -169,8 +166,7 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
 	data_ptr->val.source_count = source_count;
 	data_ptr->kind = EXPR_ID_DATA__VALUE;
 
-	ret = hashmap__set(ctx->ids, id, data_ptr,
-			   (const void **)&old_key, (void **)&old_data);
+	ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data);
 	if (ret)
 		free(data_ptr);
 	free(old_key);
@@ -205,8 +201,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
 	data_ptr->ref.metric_expr = ref->metric_expr;
 	data_ptr->kind = EXPR_ID_DATA__REF;
 
-	ret = hashmap__set(ctx->ids, name, data_ptr,
-			   (const void **)&old_key, (void **)&old_data);
+	ret = hashmap__set(ctx->ids, name, data_ptr, &old_key, &old_data);
 	if (ret)
 		free(data_ptr);
 
@@ -221,7 +216,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
 int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
 		 struct expr_id_data **data)
 {
-	return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1;
+	return hashmap__find(ctx->ids, id, data) ? 0 : -1;
 }
 
 bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
@@ -232,7 +227,7 @@ bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
 	struct expr_id_data *data;
 
 	hashmap__for_each_entry(needles->ids, cur, bkt) {
-		if (expr__get_id(haystack, cur->key, &data))
+		if (expr__get_id(haystack, cur->pkey, &data))
 			return false;
 	}
 	return true;
@@ -282,8 +277,7 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
 	struct expr_id_data *old_val = NULL;
 	char *old_key = NULL;
 
-	hashmap__delete(ctx->ids, id,
-			(const void **)&old_key, (void **)&old_val);
+	hashmap__delete(ctx->ids, id, &old_key, &old_val);
 	free(old_key);
 	free(old_val);
 }
@@ -314,8 +308,8 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx)
 	size_t bkt;
 
 	hashmap__for_each_entry(ctx->ids, cur, bkt) {
-		free((char *)cur->key);
-		free(cur->value);
+		free((void *)cur->pkey);
+		free(cur->pvalue);
 	}
 	hashmap__clear(ctx->ids);
 }
@@ -330,8 +324,8 @@ void expr__ctx_free(struct expr_parse_ctx *ctx)
 
 	free(ctx->sctx.user_requested_cpu_list);
 	hashmap__for_each_entry(ctx->ids, cur, bkt) {
-		free((char *)cur->key);
-		free(cur->value);
+		free((void *)cur->pkey);
+		free(cur->pvalue);
 	}
 	hashmap__free(ctx->ids);
 	free(ctx);
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
index aeb09c288716..140ee4055676 100644
--- a/tools/perf/util/hashmap.c
+++ b/tools/perf/util/hashmap.c
@@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map)
 }
 
 static bool hashmap_find_entry(const struct hashmap *map,
-			       const void *key, size_t hash,
+			       const long key, size_t hash,
 			       struct hashmap_entry ***pprev,
 			       struct hashmap_entry **entry)
 {
@@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map,
 	return false;
 }
 
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value)
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
 	int err;
 
 	if (old_key)
-		*old_key = NULL;
+		*old_key = 0;
 	if (old_value)
-		*old_value = NULL;
+		*old_value = 0;
 
 	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
 	if (strategy != HASHMAP_APPEND &&
@@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value,
 	return 0;
 }
 
-bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+bool hashmap_find(const struct hashmap *map, long key, long *value)
 {
 	struct hashmap_entry *entry;
 	size_t h;
@@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value)
 	return true;
 }
 
-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value)
+bool hashmap_delete(struct hashmap *map, long key,
+		    long *old_key, long *old_value)
 {
 	struct hashmap_entry **pprev, *entry;
 	size_t h;
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index 10a4c4cd13cf..3fe647477bad 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s)
 	return h;
 }
 
-typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
-typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+typedef size_t (*hashmap_hash_fn)(long key, void *ctx);
+typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx);
 
+/*
+ * Hashmap interface is polymorphic, keys and values could be either
+ * long-sized integers or pointers, this is achieved as follows:
+ * - interface functions that operate on keys and values are hidden
+ *   behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert;
+ * - these auxiliary macros cast the key and value parameters as
+ *   long or long *, so the user does not have to specify the casts explicitly;
+ * - for pointer parameters (e.g. old_key) the size of the pointed
+ *   type is verified by hashmap_cast_ptr using _Static_assert;
+ * - when iterating using hashmap__for_each_* forms
+ *   hasmap_entry->key should be used for integer keys and
+ *   hasmap_entry->pkey should be used for pointer keys,
+ *   same goes for values.
+ */
 struct hashmap_entry {
-	const void *key;
-	void *value;
+	union {
+		long key;
+		const void *pkey;
+	};
+	union {
+		long value;
+		void *pvalue;
+	};
 	struct hashmap_entry *next;
 };
 
@@ -102,6 +122,12 @@ enum hashmap_insert_strategy {
 	HASHMAP_APPEND,
 };
 
+#define hashmap_cast_ptr(p) ({								\
+	_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),			\
+		       #p " pointee should be a long-sized integer or a pointer");	\
+	(long *)(p);									\
+})
+
 /*
  * hashmap__insert() adds key/value entry w/ various semantics, depending on
  * provided strategy value. If a given key/value pair replaced already
@@ -109,42 +135,38 @@ enum hashmap_insert_strategy {
  * through old_key and old_value to allow calling code do proper memory
  * management.
  */
-int hashmap__insert(struct hashmap *map, const void *key, void *value,
-		    enum hashmap_insert_strategy strategy,
-		    const void **old_key, void **old_value);
+int hashmap_insert(struct hashmap *map, long key, long value,
+		   enum hashmap_insert_strategy strategy,
+		   long *old_key, long *old_value);
 
-static inline int hashmap__add(struct hashmap *map,
-			       const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
-}
+#define hashmap__insert(map, key, value, strategy, old_key, old_value) \
+	hashmap_insert((map), (long)(key), (long)(value), (strategy),  \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))
 
-static inline int hashmap__set(struct hashmap *map,
-			       const void *key, void *value,
-			       const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_SET,
-			       old_key, old_value);
-}
+#define hashmap__add(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL)
 
-static inline int hashmap__update(struct hashmap *map,
-				  const void *key, void *value,
-				  const void **old_key, void **old_value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
-			       old_key, old_value);
-}
+#define hashmap__set(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value))
 
-static inline int hashmap__append(struct hashmap *map,
-				  const void *key, void *value)
-{
-	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
-}
+#define hashmap__update(map, key, value, old_key, old_value) \
+	hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value))
+
+#define hashmap__append(map, key, value) \
+	hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL)
+
+bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value);
+
+#define hashmap__delete(map, key, old_key, old_value)		       \
+	hashmap_delete((map), (long)(key),			       \
+		       hashmap_cast_ptr(old_key),		       \
+		       hashmap_cast_ptr(old_value))
 
-bool hashmap__delete(struct hashmap *map, const void *key,
-		     const void **old_key, void **old_value);
+bool hashmap_find(const struct hashmap *map, long key, long *value);
 
-bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+#define hashmap__find(map, key, value) \
+	hashmap_find((map), (long)(key), hashmap_cast_ptr(value))
 
 /*
  * hashmap__for_each_entry - iterate over all entries in hashmap
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 4c98ac29ee13..6b3505b1b6ac 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -288,7 +288,7 @@ static int setup_metric_events(struct hashmap *ids,
 		 * combined or shared groups, this metric may not care
 		 * about this event.
 		 */
-		if (hashmap__find(ids, metric_id, (void **)&val_ptr)) {
+		if (hashmap__find(ids, metric_id, &val_ptr)) {
 			metric_events[matched_events++] = ev;
 
 			if (matched_events >= ids_size)
@@ -764,7 +764,7 @@ static int metricgroup__build_event_string(struct strbuf *events,
 #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0)
 
 	hashmap__for_each_entry(ctx->ids, cur, bkt) {
-		const char *sep, *rsep, *id = cur->key;
+		const char *sep, *rsep, *id = cur->pkey;
 		enum perf_tool_event ev;
 
 		pr_debug("found event %s\n", id);
@@ -945,14 +945,14 @@ static int resolve_metric(struct list_head *metric_list,
 	hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
 		struct pmu_event pe;
 
-		if (metricgroup__find_metric(cur->key, table, &pe)) {
+		if (metricgroup__find_metric(cur->pkey, table, &pe)) {
 			pending = realloc(pending,
 					(pending_cnt + 1) * sizeof(struct to_resolve));
 			if (!pending)
 				return -ENOMEM;
 
 			memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe));
-			pending[pending_cnt].key = cur->key;
+			pending[pending_cnt].key = cur->pkey;
 			pending_cnt++;
 		}
 	}
@@ -1433,7 +1433,7 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
 	list_for_each_entry(m, metric_list, nd) {
 		if (m->has_constraint && !m->modifier) {
 			hashmap__for_each_entry(m->pctx->ids, cur, bkt) {
-				dup = strdup(cur->key);
+				dup = strdup(cur->pkey);
 				if (!dup) {
 					ret = -ENOMEM;
 					goto err_out;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 07b29fe272c7..0bf71b02aa06 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -398,7 +398,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 
 		i = 0;
 		hashmap__for_each_entry(ctx->ids, cur, bkt) {
-			const char *metric_name = (const char *)cur->key;
+			const char *metric_name = cur->pkey;
 
 			found = false;
 			if (leader) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8ec8bb4a9912..c0656f85bfa5 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -278,15 +278,14 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
 	}
 }
 
-static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
+static size_t pkg_id_hash(long __key, void *ctx __maybe_unused)
 {
 	uint64_t *key = (uint64_t *) __key;
 
 	return *key & 0xffffffff;
 }
 
-static bool pkg_id_equal(const void *__key1, const void *__key2,
-			 void *ctx __maybe_unused)
+static bool pkg_id_equal(long __key1, long __key2, void *ctx __maybe_unused)
 {
 	uint64_t *key1 = (uint64_t *) __key1;
 	uint64_t *key2 = (uint64_t *) __key2;
@@ -347,11 +346,11 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
 		return -ENOMEM;
 
 	*key = (uint64_t)d << 32 | s;
-	if (hashmap__find(mask, (void *)key, NULL)) {
+	if (hashmap__find(mask, key, NULL)) {
 		*skip = true;
 		free(key);
 	} else
-		ret = hashmap__add(mask, (void *)key, (void *)1);
+		ret = hashmap__add(mask, key, 1);
 
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 4747ab18f97f..d358a223fd2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -7,17 +7,18 @@
  */
 #include "test_progs.h"
 #include "bpf/hashmap.h"
+#include <stddef.h>
 
 static int duration = 0;
 
-static size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(long k, void *ctx)
 {
-	return (long)k;
+	return k;
 }
 
-static bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(long a, long b, void *ctx)
 {
-	return (long)a == (long)b;
+	return a == b;
 }
 
 static inline size_t next_pow_2(size_t n)
@@ -52,8 +53,8 @@ static void test_hashmap_generic(void)
 		return;
 
 	for (i = 0; i < ELEM_CNT; i++) {
-		const void *oldk, *k = (const void *)(long)i;
-		void *oldv, *v = (void *)(long)(1024 + i);
+		long oldk, k = i;
+		long oldv, v = 1024 + i;
 
 		err = hashmap__update(map, k, v, &oldk, &oldv);
 		if (CHECK(err != -ENOENT, "hashmap__update",
@@ -64,20 +65,18 @@ static void test_hashmap_generic(void)
 			err = hashmap__add(map, k, v);
 		} else {
 			err = hashmap__set(map, k, v, &oldk, &oldv);
-			if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
-				  "unexpected k/v: %p=%p\n", oldk, oldv))
+			if (CHECK(oldk != 0 || oldv != 0, "check_kv",
+				  "unexpected k/v: %ld=%ld\n", oldk, oldv))
 				goto cleanup;
 		}
 
-		if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
-			       (long)k, (long)v, err))
+		if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err))
 			goto cleanup;
 
 		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
-			  "failed to find key %ld\n", (long)k))
+			  "failed to find key %ld\n", k))
 			goto cleanup;
-		if (CHECK(oldv != v, "elem_val",
-			  "found value is wrong: %ld\n", (long)oldv))
+		if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv))
 			goto cleanup;
 	}
 
@@ -91,8 +90,8 @@ static void test_hashmap_generic(void)
 
 	found_msk = 0;
 	hashmap__for_each_entry(map, entry, bkt) {
-		long k = (long)entry->key;
-		long v = (long)entry->value;
+		long k = entry->key;
+		long v = entry->value;
 
 		found_msk |= 1ULL << k;
 		if (CHECK(v - k != 1024, "check_kv",
@@ -104,8 +103,8 @@ static void test_hashmap_generic(void)
 		goto cleanup;
 
 	for (i = 0; i < ELEM_CNT; i++) {
-		const void *oldk, *k = (const void *)(long)i;
-		void *oldv, *v = (void *)(long)(256 + i);
+		long oldk, k = i;
+		long oldv, v = 256 + i;
 
 		err = hashmap__add(map, k, v);
 		if (CHECK(err != -EEXIST, "hashmap__add",
@@ -119,13 +118,13 @@ static void test_hashmap_generic(void)
 
 		if (CHECK(err, "elem_upd",
 			  "failed to update k/v %ld = %ld: %d\n",
-			  (long)k, (long)v, err))
+			  k, v, err))
 			goto cleanup;
 		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
-			  "failed to find key %ld\n", (long)k))
+			  "failed to find key %ld\n", k))
 			goto cleanup;
 		if (CHECK(oldv != v, "elem_val",
-			  "found value is wrong: %ld\n", (long)oldv))
+			  "found value is wrong: %ld\n", oldv))
 			goto cleanup;
 	}
 
@@ -139,8 +138,8 @@ static void test_hashmap_generic(void)
 
 	found_msk = 0;
 	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
-		long k = (long)entry->key;
-		long v = (long)entry->value;
+		long k = entry->key;
+		long v = entry->value;
 
 		found_msk |= 1ULL << k;
 		if (CHECK(v - k != 256, "elem_check",
@@ -152,7 +151,7 @@ static void test_hashmap_generic(void)
 		goto cleanup;
 
 	found_cnt = 0;
-	hashmap__for_each_key_entry(map, entry, (void *)0) {
+	hashmap__for_each_key_entry(map, entry, 0) {
 		found_cnt++;
 	}
 	if (CHECK(!found_cnt, "found_cnt",
@@ -161,27 +160,25 @@ static void test_hashmap_generic(void)
 
 	found_msk = 0;
 	found_cnt = 0;
-	hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
-		const void *oldk, *k;
-		void *oldv, *v;
+	hashmap__for_each_key_entry_safe(map, entry, tmp, 0) {
+		long oldk, k;
+		long oldv, v;
 
 		k = entry->key;
 		v = entry->value;
 
 		found_cnt++;
-		found_msk |= 1ULL << (long)k;
+		found_msk |= 1ULL << k;
 
 		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
-			  "failed to delete k/v %ld = %ld\n",
-			  (long)k, (long)v))
+			  "failed to delete k/v %ld = %ld\n", k, v))
 			goto cleanup;
 		if (CHECK(oldk != k || oldv != v, "check_old",
 			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
-			  (long)k, (long)v, (long)oldk, (long)oldv))
+			  k, v, oldk, oldv))
 			goto cleanup;
 		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
-			  "unexpectedly deleted k/v %ld = %ld\n",
-			  (long)oldk, (long)oldv))
+			  "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv))
 			goto cleanup;
 	}
 
@@ -198,26 +195,24 @@ static void test_hashmap_generic(void)
 		goto cleanup;
 
 	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
-		const void *oldk, *k;
-		void *oldv, *v;
+		long oldk, k;
+		long oldv, v;
 
 		k = entry->key;
 		v = entry->value;
 
 		found_cnt++;
-		found_msk |= 1ULL << (long)k;
+		found_msk |= 1ULL << k;
 
 		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
-			  "failed to delete k/v %ld = %ld\n",
-			  (long)k, (long)v))
+			  "failed to delete k/v %ld = %ld\n", k, v))
 			goto cleanup;
 		if (CHECK(oldk != k || oldv != v, "elem_check",
 			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
-			  (long)k, (long)v, (long)oldk, (long)oldv))
+			  k, v, oldk, oldv))
 			goto cleanup;
 		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
-			  "unexpectedly deleted k/v %ld = %ld\n",
-			  (long)k, (long)v))
+			  "unexpectedly deleted k/v %ld = %ld\n", k, v))
 			goto cleanup;
 	}
 
@@ -235,7 +230,7 @@ static void test_hashmap_generic(void)
 	hashmap__for_each_entry(map, entry, bkt) {
 		CHECK(false, "elem_exists",
 		      "unexpected map entries left: %ld = %ld\n",
-		      (long)entry->key, (long)entry->value);
+		      entry->key, entry->value);
 		goto cleanup;
 	}
 
@@ -243,22 +238,107 @@ static void test_hashmap_generic(void)
 	hashmap__for_each_entry(map, entry, bkt) {
 		CHECK(false, "elem_exists",
 		      "unexpected map entries left: %ld = %ld\n",
-		      (long)entry->key, (long)entry->value);
+		      entry->key, entry->value);
+		goto cleanup;
+	}
+
+cleanup:
+	hashmap__free(map);
+}
+
+static size_t str_hash_fn(long a, void *ctx)
+{
+	return str_hash((char *)a);
+}
+
+static bool str_equal_fn(long a, long b, void *ctx)
+{
+	return strcmp((char *)a, (char *)b) == 0;
+}
+
+/* Verify that hashmap interface works with pointer keys and values */
+static void test_hashmap_ptr_iface(void)
+{
+	const char *key, *value, *old_key, *old_value;
+	struct hashmap_entry *cur;
+	struct hashmap *map;
+	int err, i, bkt;
+
+	map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n"))
 		goto cleanup;
+
+#define CHECK_STR(fn, var, expected)					\
+	CHECK(strcmp(var, (expected)), (fn),				\
+	      "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected))
+
+	err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL);
+	if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+		goto cleanup;
+
+	err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value);
+	if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+		goto cleanup;
+	CHECK_STR("hashmap__update", old_key, "a");
+	CHECK_STR("hashmap__update", old_value, "apricot");
+
+	err = hashmap__add(map, "b", "banana");
+	if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err))
+		goto cleanup;
+
+	err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value);
+	if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err))
+		goto cleanup;
+	CHECK_STR("hashmap__set", old_key, "b");
+	CHECK_STR("hashmap__set", old_value, "banana");
+
+	err = hashmap__update(map, "b", "blueberry", &old_key, &old_value);
+	if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err))
+		goto cleanup;
+	CHECK_STR("hashmap__update", old_key, "b");
+	CHECK_STR("hashmap__update", old_value, "breadfruit");
+
+	err = hashmap__append(map, "c", "cherry");
+	if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err))
+		goto cleanup;
+
+	if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value),
+		  "hashmap__delete", "expected to have entry for 'c'\n"))
+		goto cleanup;
+	CHECK_STR("hashmap__delete", old_key, "c");
+	CHECK_STR("hashmap__delete", old_value, "cherry");
+
+	CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n");
+	CHECK_STR("hashmap__find", value, "blueberry");
+
+	if (CHECK(!hashmap__delete(map, "b", NULL, NULL),
+		  "hashmap__delete", "expected to have entry for 'b'\n"))
+		goto cleanup;
+
+	i = 0;
+	hashmap__for_each_entry(map, cur, bkt) {
+		if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries"))
+			goto cleanup;
+		key = cur->pkey;
+		value = cur->pvalue;
+		CHECK_STR("entry", key, "a");
+		CHECK_STR("entry", value, "apple");
+		i++;
 	}
+#undef CHECK_STR
 
 cleanup:
 	hashmap__free(map);
 }
 
-static size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(long k, void *ctx)
 {
 	return 0;
 }
 
 static void test_hashmap_multimap(void)
 {
-	void *k1 = (void *)0, *k2 = (void *)1;
+	long k1 = 0, k2 = 1;
 	struct hashmap_entry *entry;
 	struct hashmap *map;
 	long found_msk;
@@ -273,23 +353,23 @@ static void test_hashmap_multimap(void)
 	 * [0] -> 1, 2, 4;
 	 * [1] -> 8, 16, 32;
 	 */
-	err = hashmap__append(map, k1, (void *)1);
+	err = hashmap__append(map, k1, 1);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
-	err = hashmap__append(map, k1, (void *)2);
+	err = hashmap__append(map, k1, 2);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
-	err = hashmap__append(map, k1, (void *)4);
+	err = hashmap__append(map, k1, 4);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
 
-	err = hashmap__append(map, k2, (void *)8);
+	err = hashmap__append(map, k2, 8);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
-	err = hashmap__append(map, k2, (void *)16);
+	err = hashmap__append(map, k2, 16);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
-	err = hashmap__append(map, k2, (void *)32);
+	err = hashmap__append(map, k2, 32);
 	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
 		goto cleanup;
 
@@ -300,7 +380,7 @@ static void test_hashmap_multimap(void)
 	/* verify global iteration still works and sees all values */
 	found_msk = 0;
 	hashmap__for_each_entry(map, entry, bkt) {
-		found_msk |= (long)entry->value;
+		found_msk |= entry->value;
 	}
 	if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
 		  "not all keys iterated: %lx\n", found_msk))
@@ -309,7 +389,7 @@ static void test_hashmap_multimap(void)
 	/* iterate values for key 1 */
 	found_msk = 0;
 	hashmap__for_each_key_entry(map, entry, k1) {
-		found_msk |= (long)entry->value;
+		found_msk |= entry->value;
 	}
 	if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
 		  "invalid k1 values: %lx\n", found_msk))
@@ -318,7 +398,7 @@ static void test_hashmap_multimap(void)
 	/* iterate values for key 2 */
 	found_msk = 0;
 	hashmap__for_each_key_entry(map, entry, k2) {
-		found_msk |= (long)entry->value;
+		found_msk |= entry->value;
 	}
 	if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
 		  "invalid k2 values: %lx\n", found_msk))
@@ -333,7 +413,7 @@ static void test_hashmap_empty()
 	struct hashmap_entry *entry;
 	int bkt;
 	struct hashmap *map;
-	void *k = (void *)0;
+	long k = 0;
 
 	/* force collisions */
 	map = hashmap__new(hash_fn, equal_fn, NULL);
@@ -374,4 +454,6 @@ void test_hashmap()
 		test_hashmap_multimap();
 	if (test__start_subtest("empty"))
 		test_hashmap_empty();
+	if (test__start_subtest("ptr_iface"))
+		test_hashmap_ptr_iface();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 287b3ac40227..eedbf1937fc4 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -312,12 +312,12 @@ static inline __u64 get_time_ns(void)
 	return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
 }
 
-static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
 {
 	return str_hash((const char *) key);
 }
 
-static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
 {
 	return strcmp((const char *) key1, (const char *) key2) == 0;
 }
@@ -372,7 +372,7 @@ static int get_syms(char ***symsp, size_t *cntp)
 			     sizeof("__ftrace_invalid_address__") - 1))
 			continue;
 
-		err = hashmap__add(map, name, NULL);
+		err = hashmap__add(map, name, 0);
 		if (err == -EEXIST)
 			continue;
 		if (err)
-- 
cgit v1.2.3-58-ga151


From 082108fd6932772deb7e329f118687b4c03fc6a5 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 9 Nov 2022 16:26:10 +0200
Subject: libbpf: Resolve unambigous forward declarations

Resolve forward declarations that don't take part in type graphs
comparisons if declaration name is unambiguous. Example:

CU #1:

struct foo;              // standalone forward declaration
struct foo *some_global;

CU #2:

struct foo { int x; };
struct foo *another_global;

The `struct foo` from CU #1 is not a part of any definition that is
compared against another definition while `btf_dedup_struct_types`
processes structural types. The the BTF after `btf_dedup_struct_types`
the BTF looks as follows:

[1] STRUCT 'foo' size=4 vlen=1 ...
[2] INT 'int' size=4 ...
[3] PTR '(anon)' type_id=1
[4] FWD 'foo' fwd_kind=struct
[5] PTR '(anon)' type_id=4

This commit adds a new pass `btf_dedup_resolve_fwds`, that maps such
forward declarations to structs or unions with identical name in case
if the name is not ambiguous.

The pass is positioned before `btf_dedup_ref_types` so that types
[3] and [5] could be merged as a same type after [1] and [4] are merged.
The final result for the example above looks as follows:

[1] STRUCT 'foo' size=4 vlen=1
	'x' type_id=2 bits_offset=0
[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[3] PTR '(anon)' type_id=1

For defconfig kernel with BTF enabled this removes 63 forward
declarations. Examples of removed declarations: `pt_regs`, `in6_addr`.
The running time of `btf__dedup` function is increased by about 3%.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221109142611.879983-3-eddyz87@gmail.com
---
 tools/lib/bpf/btf.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 139 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 442d4d0f98b8..3bd812bf88ff 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2881,6 +2881,7 @@ static int btf_dedup_strings(struct btf_dedup *d);
 static int btf_dedup_prim_types(struct btf_dedup *d);
 static int btf_dedup_struct_types(struct btf_dedup *d);
 static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_resolve_fwds(struct btf_dedup *d);
 static int btf_dedup_compact_types(struct btf_dedup *d);
 static int btf_dedup_remap_types(struct btf_dedup *d);
 
@@ -2988,15 +2989,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
  * Algorithm summary
  * =================
  *
- * Algorithm completes its work in 6 separate passes:
+ * Algorithm completes its work in 7 separate passes:
  *
  * 1. Strings deduplication.
  * 2. Primitive types deduplication (int, enum, fwd).
  * 3. Struct/union types deduplication.
- * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ * 4. Resolve unambiguous forward declarations.
+ * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
  *    protos, and const/volatile/restrict modifiers).
- * 5. Types compaction.
- * 6. Types remapping.
+ * 6. Types compaction.
+ * 7. Types remapping.
  *
  * Algorithm determines canonical type descriptor, which is a single
  * representative type for each truly unique type. This canonical type is the
@@ -3060,6 +3062,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
 		pr_debug("btf_dedup_struct_types failed:%d\n", err);
 		goto done;
 	}
+	err = btf_dedup_resolve_fwds(d);
+	if (err < 0) {
+		pr_debug("btf_dedup_resolve_fwds failed:%d\n", err);
+		goto done;
+	}
 	err = btf_dedup_ref_types(d);
 	if (err < 0) {
 		pr_debug("btf_dedup_ref_types failed:%d\n", err);
@@ -4501,6 +4508,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 	return 0;
 }
 
+/*
+ * Collect a map from type names to type ids for all canonical structs
+ * and unions. If the same name is shared by several canonical types
+ * use a special value 0 to indicate this fact.
+ */
+static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
+{
+	__u32 nr_types = btf__type_cnt(d->btf);
+	struct btf_type *t;
+	__u32 type_id;
+	__u16 kind;
+	int err;
+
+	/*
+	 * Iterate over base and split module ids in order to get all
+	 * available structs in the map.
+	 */
+	for (type_id = 1; type_id < nr_types; ++type_id) {
+		t = btf_type_by_id(d->btf, type_id);
+		kind = btf_kind(t);
+
+		if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+			continue;
+
+		/* Skip non-canonical types */
+		if (type_id != d->map[type_id])
+			continue;
+
+		err = hashmap__add(names_map, t->name_off, type_id);
+		if (err == -EEXIST)
+			err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
+{
+	struct btf_type *t = btf_type_by_id(d->btf, type_id);
+	enum btf_fwd_kind fwd_kind = btf_kflag(t);
+	__u16 cand_kind, kind = btf_kind(t);
+	struct btf_type *cand_t;
+	uintptr_t cand_id;
+
+	if (kind != BTF_KIND_FWD)
+		return 0;
+
+	/* Skip if this FWD already has a mapping */
+	if (type_id != d->map[type_id])
+		return 0;
+
+	if (!hashmap__find(names_map, t->name_off, &cand_id))
+		return 0;
+
+	/* Zero is a special value indicating that name is not unique */
+	if (!cand_id)
+		return 0;
+
+	cand_t = btf_type_by_id(d->btf, cand_id);
+	cand_kind = btf_kind(cand_t);
+	if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
+	    (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))
+		return 0;
+
+	d->map[type_id] = cand_id;
+
+	return 0;
+}
+
+/*
+ * Resolve unambiguous forward declarations.
+ *
+ * The lion's share of all FWD declarations is resolved during
+ * `btf_dedup_struct_types` phase when different type graphs are
+ * compared against each other. However, if in some compilation unit a
+ * FWD declaration is not a part of a type graph compared against
+ * another type graph that declaration's canonical type would not be
+ * changed. Example:
+ *
+ * CU #1:
+ *
+ * struct foo;
+ * struct foo *some_global;
+ *
+ * CU #2:
+ *
+ * struct foo { int u; };
+ * struct foo *another_global;
+ *
+ * After `btf_dedup_struct_types` the BTF looks as follows:
+ *
+ * [1] STRUCT 'foo' size=4 vlen=1 ...
+ * [2] INT 'int' size=4 ...
+ * [3] PTR '(anon)' type_id=1
+ * [4] FWD 'foo' fwd_kind=struct
+ * [5] PTR '(anon)' type_id=4
+ *
+ * This pass assumes that such FWD declarations should be mapped to
+ * structs or unions with identical name in case if the name is not
+ * ambiguous.
+ */
+static int btf_dedup_resolve_fwds(struct btf_dedup *d)
+{
+	int i, err;
+	struct hashmap *names_map;
+
+	names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(names_map))
+		return PTR_ERR(names_map);
+
+	err = btf_dedup_fill_unique_names_map(d, names_map);
+	if (err < 0)
+		goto exit;
+
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
+		if (err < 0)
+			break;
+	}
+
+exit:
+	hashmap__free(names_map);
+	return err;
+}
+
 /*
  * Compact types.
  *
-- 
cgit v1.2.3-58-ga151


From 99e18fad5ff79eea2ea9ec7e6d800b286552e005 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 9 Nov 2022 16:26:11 +0200
Subject: selftests/bpf: Tests for btf_dedup_resolve_fwds

Tests to verify the following behavior of `btf_dedup_resolve_fwds`:
- remapping for struct forward declarations;
- remapping for union forward declarations;
- no remapping if forward declaration kind does not match similarly
  named struct or union declaration;
- no remapping if forward declaration name is ambiguous;
- base ids are considered for fwd resolution in split btf scenario.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20221109142611.879983-4-eddyz87@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c       | 176 +++++++++++++++++++++
 .../selftests/bpf/prog_tests/btf_dedup_split.c     |  45 ++++--
 2 files changed, 206 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index c3e1cea9abae..95a2b80f0d17 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7690,6 +7690,182 @@ static struct btf_dedup_test dedup_tests[] = {
 		BTF_STR_SEC("\0e1\0e1_val\0td"),
 	},
 },
+{
+	.descr = "dedup: standalone fwd declaration struct",
+	/*
+	 * Verify that CU1:foo and CU2:foo would be unified and that
+	 * typedef/ptr would be updated to point to CU1:foo.
+	 *
+	 * // CU 1:
+	 * struct foo { int x; };
+	 *
+	 * // CU 2:
+	 * struct foo;
+	 * typedef struct foo *foo_ptr;
+	 */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_NTH(1), 0),                   /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 4),               /* [5] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			BTF_PTR_ENC(1),                                /* [3] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 3),               /* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+},
+{
+	.descr = "dedup: standalone fwd declaration union",
+	/*
+	 * Verify that CU1:foo and CU2:foo would be unified and that
+	 * typedef/ptr would be updated to point to CU1:foo.
+	 * Same as "dedup: standalone fwd declaration struct" but for unions.
+	 *
+	 * // CU 1:
+	 * union foo { int x; };
+	 *
+	 * // CU 2:
+	 * union foo;
+	 * typedef union foo *foo_ptr;
+	 */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_UNION_ENC(NAME_NTH(1), 1, 4),              /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_TBD, 1),                      /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 4),               /* [5] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_UNION_ENC(NAME_NTH(1), 1, 4),              /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			BTF_PTR_ENC(1),                                /* [3] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 3),               /* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+},
+{
+	.descr = "dedup: standalone fwd declaration wrong kind",
+	/*
+	 * Negative test for btf_dedup_resolve_fwds:
+	 * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped;
+	 * - typedef/ptr should remain unchanged as well.
+	 *
+	 * // CU 1:
+	 * struct foo { int x; };
+	 *
+	 * // CU 2:
+	 * union foo;
+	 * typedef union foo *foo_ptr;
+	 */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_NTH(3), 1),                   /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 4),               /* [5] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+	.expect = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_NTH(3), 1),                   /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(3), 4),               /* [5] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+	},
+},
+{
+	.descr = "dedup: standalone fwd declaration name conflict",
+	/*
+	 * Negative test for btf_dedup_resolve_fwds:
+	 * - two candidates for CU2:foo dedup, thus it is unchanged;
+	 * - typedef/ptr should remain unchanged as well.
+	 *
+	 * // CU 1:
+	 * struct foo { int x; };
+	 *
+	 * // CU 2:
+	 * struct foo;
+	 * typedef struct foo *foo_ptr;
+	 *
+	 * // CU 3:
+	 * struct foo { int x; int y; };
+	 */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_NTH(1), 0),                   /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(4), 4),               /* [5] */
+			/* CU 3 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),             /* [6] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+	},
+	.expect = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 4),             /* [1] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+			/* CU 2 */
+			BTF_FWD_ENC(NAME_NTH(1), 0),                   /* [3] */
+			BTF_PTR_ENC(3),                                /* [4] */
+			BTF_TYPEDEF_ENC(NAME_NTH(4), 4),               /* [5] */
+			/* CU 3 */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),             /* [6] */
+			BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+	},
+},
 };
 
 static int btf_type_size(const struct btf_type *t)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index 90aac437576d..d9024c7a892a 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -141,6 +141,10 @@ static void test_split_fwd_resolve() {
 	btf__add_field(btf1, "f2", 3, 64, 0);		/*      struct s2 *f2; */
 							/* } */
 	btf__add_struct(btf1, "s2", 4);			/* [5] struct s2 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+	/* keep this not a part of type the graph to test btf_dedup_resolve_fwds */
+	btf__add_struct(btf1, "s3", 4);                 /* [6] struct s3 { */
 	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
 							/* } */
 
@@ -153,20 +157,24 @@ static void test_split_fwd_resolve() {
 		"\t'f1' type_id=2 bits_offset=0\n"
 		"\t'f2' type_id=3 bits_offset=64",
 		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[6] STRUCT 's3' size=4 vlen=1\n"
 		"\t'f1' type_id=1 bits_offset=0");
 
 	btf2 = btf__new_empty_split(btf1);
 	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
 		goto cleanup;
 
-	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [6] int */
-	btf__add_ptr(btf2, 10);				/* [7] ptr to struct s1 */
-	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [8] fwd for struct s2 */
-	btf__add_ptr(btf2, 8);				/* [9] ptr to fwd struct s2 */
-	btf__add_struct(btf2, "s1", 16);		/* [10] struct s1 { */
-	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1; */
-	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2; */
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [7] int */
+	btf__add_ptr(btf2, 11);				/* [8] ptr to struct s1 */
+	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [9] fwd for struct s2 */
+	btf__add_ptr(btf2, 9);				/* [10] ptr to fwd struct s2 */
+	btf__add_struct(btf2, "s1", 16);		/* [11] struct s1 { */
+	btf__add_field(btf2, "f1", 8, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf2, "f2", 10, 64, 0);		/*      struct s2 *f2; */
 							/* } */
+	btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT);	/* [12] fwd for struct s3 */
+	btf__add_ptr(btf2, 12);				/* [13] ptr to struct s1 */
 
 	VALIDATE_RAW_BTF(
 		btf2,
@@ -178,13 +186,17 @@ static void test_split_fwd_resolve() {
 		"\t'f2' type_id=3 bits_offset=64",
 		"[5] STRUCT 's2' size=4 vlen=1\n"
 		"\t'f1' type_id=1 bits_offset=0",
-		"[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
-		"[7] PTR '(anon)' type_id=10",
-		"[8] FWD 's2' fwd_kind=struct",
-		"[9] PTR '(anon)' type_id=8",
-		"[10] STRUCT 's1' size=16 vlen=2\n"
-		"\t'f1' type_id=7 bits_offset=0\n"
-		"\t'f2' type_id=9 bits_offset=64");
+		"[6] STRUCT 's3' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[8] PTR '(anon)' type_id=11",
+		"[9] FWD 's2' fwd_kind=struct",
+		"[10] PTR '(anon)' type_id=9",
+		"[11] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=8 bits_offset=0\n"
+		"\t'f2' type_id=10 bits_offset=64",
+		"[12] FWD 's3' fwd_kind=struct",
+		"[13] PTR '(anon)' type_id=12");
 
 	err = btf__dedup(btf2, NULL);
 	if (!ASSERT_OK(err, "btf_dedup"))
@@ -199,7 +211,10 @@ static void test_split_fwd_resolve() {
 		"\t'f1' type_id=2 bits_offset=0\n"
 		"\t'f2' type_id=3 bits_offset=64",
 		"[5] STRUCT 's2' size=4 vlen=1\n"
-		"\t'f1' type_id=1 bits_offset=0");
+		"\t'f1' type_id=1 bits_offset=0",
+		"[6] STRUCT 's3' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[7] PTR '(anon)' type_id=6");
 
 cleanup:
 	btf__free(btf2);
-- 
cgit v1.2.3-58-ga151


From fd74b79df0d1c9506583d0b52e3d7562a38ac34a Mon Sep 17 00:00:00 2001
From: Domenico Cerasuolo <dceras@meta.com>
Date: Wed, 9 Nov 2022 10:40:39 -0800
Subject: selftests: Fix test group SKIPPED result

When showing the result of a test group, if one
of the subtests was skipped, while still having
passing subtests, the group result was marked as
SKIP. E.g.:

223/1   usdt/basic:SKIP
223/2   usdt/multispec:OK
223/3   usdt/urand_auto_attach:OK
223/4   usdt/urand_pid_attach:OK
223     usdt:SKIP

The test result of usdt in the example above
should be OK instead of SKIP, because the test
group did have passing tests and it would be
considered in "normal" state.

With this change, only if all of the subtests
were skipped, the group test is marked as SKIP.
When only some of the subtests are skipped, a
more detailed result is given, stating how
many of the subtests were skipped. E.g:

223/1   usdt/basic:SKIP
223/2   usdt/multispec:OK
223/3   usdt/urand_auto_attach:OK
223/4   usdt/urand_pid_attach:OK
223     usdt:OK (SKIP: 1/4)

Signed-off-by: Domenico Cerasuolo <dceras@meta.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221109184039.3514033-1-cerasuolodomenico@gmail.com
---
 tools/testing/selftests/bpf/test_progs.c | 38 ++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0e9a47f97890..c34f37d7a523 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -222,6 +222,26 @@ static char *test_result(bool failed, bool skipped)
 	return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
 }
 
+#define TEST_NUM_WIDTH 7
+
+static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state)
+{
+	int skipped_cnt = test_state->skip_cnt;
+	int subtests_cnt = test_state->subtest_num;
+
+	fprintf(env.stdout, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name);
+	if (test_state->error_cnt)
+		fprintf(env.stdout, "FAIL");
+	else if (!skipped_cnt)
+		fprintf(env.stdout, "OK");
+	else if (skipped_cnt == subtests_cnt || !subtests_cnt)
+		fprintf(env.stdout, "SKIP");
+	else
+		fprintf(env.stdout, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt);
+
+	fprintf(env.stdout, "\n");
+}
+
 static void print_test_log(char *log_buf, size_t log_cnt)
 {
 	log_buf[log_cnt] = '\0';
@@ -230,18 +250,6 @@ static void print_test_log(char *log_buf, size_t log_cnt)
 		fprintf(env.stdout, "\n");
 }
 
-#define TEST_NUM_WIDTH 7
-
-static void print_test_name(int test_num, const char *test_name, char *result)
-{
-	fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name);
-
-	if (result)
-		fprintf(env.stdout, ":%s", result);
-
-	fprintf(env.stdout, "\n");
-}
-
 static void print_subtest_name(int test_num, int subtest_num,
 			       const char *test_name, char *subtest_name,
 			       char *result)
@@ -307,8 +315,7 @@ static void dump_test_log(const struct prog_test_def *test,
 					       subtest_state->skipped));
 	}
 
-	print_test_name(test->test_num, test->test_name,
-			test_result(test_failed, test_state->skip_cnt));
+	print_test_result(test, test_state);
 }
 
 static void stdio_restore(void);
@@ -1070,8 +1077,7 @@ static void run_one_test(int test_num)
 	state->tested = true;
 
 	if (verbose() && env.worker_id == -1)
-		print_test_name(test_num + 1, test->test_name,
-				test_result(state->error_cnt, state->skip_cnt));
+		print_test_result(test, state);
 
 	reset_affinity();
 	restore_netns();
-- 
cgit v1.2.3-58-ga151


From 42597aa372f5d2f26f209c5db36b1cd098b27147 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 11 Nov 2022 00:32:40 +0200
Subject: libbpf: Hashmap.h update to fix build issues using LLVM14

A fix for the LLVM compilation error while building bpftool.
Replaces the expression:

  _Static_assert((p) == NULL || ...)

by expression:

  _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || ...)

When "p" is not a constant the former is not considered to be a
constant expression by LLVM 14.

The error was introduced in the following patch-set: [1].
The error was reported here: [2].

  [1] https://lore.kernel.org/bpf/20221109142611.879983-1-eddyz87@gmail.com/
  [2] https://lore.kernel.org/all/202211110355.BcGcbZxP-lkp@intel.com/

Reported-by: kernel test robot <lkp@intel.com>
Fixes: c302378bc157 ("libbpf: Hashmap interface update to allow both long and void* keys/values")
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221110223240.1350810-1-eddyz87@gmail.com
---
 tools/lib/bpf/hashmap.h   | 3 ++-
 tools/perf/util/hashmap.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index 3fe647477bad..0a5bf1937a7c 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -123,7 +123,8 @@ enum hashmap_insert_strategy {
 };
 
 #define hashmap_cast_ptr(p) ({								\
-	_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),			\
+	_Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) ||			\
+				sizeof(*(p)) == sizeof(long),				\
 		       #p " pointee should be a long-sized integer or a pointer");	\
 	(long *)(p);									\
 })
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index 3fe647477bad..0a5bf1937a7c 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -123,7 +123,8 @@ enum hashmap_insert_strategy {
 };
 
 #define hashmap_cast_ptr(p) ({								\
-	_Static_assert((p) == NULL || sizeof(*(p)) == sizeof(long),			\
+	_Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) ||			\
+				sizeof(*(p)) == sizeof(long),				\
 		       #p " pointee should be a long-sized integer or a pointer");	\
 	(long *)(p);									\
 })
-- 
cgit v1.2.3-58-ga151


From e4c9cf0ce8c413c2030e8fb215551d7e0582ee7b Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Fri, 11 Nov 2022 11:08:36 +0800
Subject: selftests/bpf: Fix xdp_synproxy compilation failure in 32-bit arch

xdp_synproxy fails to be compiled in the 32-bit arch, log is as follows:

  xdp_synproxy.c: In function 'parse_options':
  xdp_synproxy.c:175:36: error: left shift count >= width of type [-Werror=shift-count-overflow]
    175 |                 *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
        |                                    ^~
  xdp_synproxy.c: In function 'syncookie_open_bpf_maps':
  xdp_synproxy.c:289:28: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast]
    289 |                 .map_ids = (__u64)map_ids,
        |                            ^

Fix it.

Fixes: fb5cd0ce70d4 ("selftests/bpf: Add selftests for raw syncookie helpers")
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221111030836.37632-1-yangjihong1@huawei.com
---
 tools/testing/selftests/bpf/xdp_synproxy.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
index ff35320d2be9..410a1385a01d 100644
--- a/tools/testing/selftests/bpf/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -104,7 +104,8 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *
 		{ "tc", no_argument, NULL, 'c' },
 		{ NULL, 0, NULL, 0 },
 	};
-	unsigned long mss4, mss6, wscale, ttl;
+	unsigned long mss4, wscale, ttl;
+	unsigned long long mss6;
 	unsigned int tcpipopts_mask = 0;
 
 	if (argc < 2)
@@ -286,7 +287,7 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports
 
 	prog_info = (struct bpf_prog_info) {
 		.nr_map_ids = 8,
-		.map_ids = (__u64)map_ids,
+		.map_ids = (__u64)(unsigned long)map_ids,
 	};
 	info_len = sizeof(prog_info);
 
-- 
cgit v1.2.3-58-ga151


From 9bb053490f1a5a0914eb9f7b4116a0e4a95d4f8e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 7 Nov 2022 15:04:18 -0800
Subject: bpf: Add hwtstamp field for the sockops prog

The bpf-tc prog has already been able to access the
skb_hwtstamps(skb)->hwtstamp.  This patch extends the same hwtstamp
access to the sockops prog.

In sockops, the skb is also available to the bpf prog during
the BPF_SOCK_OPS_PARSE_HDR_OPT_CB event.  There is a use case
that the hwtstamp will be useful to the sockops prog to better
measure the one-way-delay when the sender has put the tx
timestamp in the tcp header option.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221107230420.4192307-2-martin.lau@linux.dev
---
 include/uapi/linux/bpf.h       |  1 +
 net/core/filter.c              | 39 +++++++++++++++++++++++++++++++--------
 tools/include/uapi/linux/bpf.h |  1 +
 3 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 94659f6b3395..fb4c911d2a03 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6445,6 +6445,7 @@ struct bpf_sock_ops {
 				 * the outgoing header has not
 				 * been written yet.
 				 */
+	__u64 skb_hwtstamp;
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
diff --git a/net/core/filter.c b/net/core/filter.c
index cb3b635e35be..cd667cdbdb26 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8925,6 +8925,10 @@ static bool sock_ops_is_valid_access(int off, int size,
 			bpf_ctx_record_field_size(info, size_default);
 			return bpf_ctx_narrow_access_ok(off, size,
 							size_default);
+		case offsetof(struct bpf_sock_ops, skb_hwtstamp):
+			if (size != sizeof(__u64))
+				return false;
+			break;
 		default:
 			if (size != size_default)
 				return false;
@@ -9108,21 +9112,21 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
 	return insn;
 }
 
-static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
+static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
 						  struct bpf_insn *insn)
 {
 	/* si->dst_reg = skb_shinfo(SKB); */
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
-			      BPF_REG_AX, si->src_reg,
+			      BPF_REG_AX, skb_reg,
 			      offsetof(struct sk_buff, end));
 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
-			      si->dst_reg, si->src_reg,
+			      dst_reg, skb_reg,
 			      offsetof(struct sk_buff, head));
-	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+	*insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
 #else
 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
-			      si->dst_reg, si->src_reg,
+			      dst_reg, skb_reg,
 			      offsetof(struct sk_buff, end));
 #endif
 
@@ -9515,7 +9519,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct __sk_buff, gso_segs):
-		insn = bpf_convert_shinfo_access(si, insn);
+		insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
 				      si->dst_reg, si->dst_reg,
 				      bpf_target_off(struct skb_shared_info,
@@ -9523,7 +9527,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 						     target_size));
 		break;
 	case offsetof(struct __sk_buff, gso_size):
-		insn = bpf_convert_shinfo_access(si, insn);
+		insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
 				      si->dst_reg, si->dst_reg,
 				      bpf_target_off(struct skb_shared_info,
@@ -9550,7 +9554,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
 		BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
 
-		insn = bpf_convert_shinfo_access(si, insn);
+		insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
 		*insn++ = BPF_LDX_MEM(BPF_DW,
 				      si->dst_reg, si->dst_reg,
 				      bpf_target_off(struct skb_shared_info,
@@ -10400,6 +10404,25 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 						       tcp_flags),
 				      si->dst_reg, si->dst_reg, off);
 		break;
+	case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
+		struct bpf_insn *jmp_on_null_skb;
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+						       skb),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern,
+					       skb));
+		/* Reserve one insn to test skb == NULL */
+		jmp_on_null_skb = insn++;
+		insn = bpf_convert_shinfo_access(si->dst_reg, si->dst_reg, insn);
+		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct skb_shared_info,
+						     hwtstamps, 8,
+						     target_size));
+		*jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0,
+					       insn - jmp_on_null_skb - 1);
+		break;
+	}
 	}
 	return insn - insn_buf;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 94659f6b3395..fb4c911d2a03 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6445,6 +6445,7 @@ struct bpf_sock_ops {
 				 * the outgoing header has not
 				 * been written yet.
 				 */
+	__u64 skb_hwtstamp;
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
-- 
cgit v1.2.3-58-ga151


From 52929912d7bda040b43538e8d88e8d231b76eb4e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 7 Nov 2022 15:04:19 -0800
Subject: selftests/bpf: Fix incorrect ASSERT in the tcp_hdr_options test

This patch fixes the incorrect ASSERT test in tcp_hdr_options during
the CHECK to ASSERT macro cleanup.

Fixes: 3082f8cd4ba3 ("selftests/bpf: Convert tcp_hdr_options test to ASSERT_* macros")
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Wang Yufen <wangyufen@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221107230420.4192307-3-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 617bbce6ef8f..57191773572a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -485,7 +485,7 @@ static void misc(void)
 			goto check_linum;
 
 		ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
-		if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
+		if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
 			goto check_linum;
 	}
 
@@ -539,7 +539,7 @@ void test_tcp_hdr_options(void)
 		goto skel_destroy;
 
 	cg_fd = test__join_cgroup(CG_NAME);
-	if (ASSERT_GE(cg_fd, 0, "join_cgroup"))
+	if (!ASSERT_GE(cg_fd, 0, "join_cgroup"))
 		goto skel_destroy;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-- 
cgit v1.2.3-58-ga151


From 8cac7a59b252732d8427ebc29c3fc6c36ff653e1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 7 Nov 2022 15:04:20 -0800
Subject: selftests/bpf: Test skops->skb_hwtstamp

This patch tests reading the skops->skb_hwtstamp field.

A local test was also done such that the shinfo hwtstamp was temporary
set to a non zero value in the kernel bpf_skops_parse_hdr()
and the same value can be read by the skops test.

An adjustment is needed to the btf_dump selftest because
the changes in the 'struct bpf_sock_ops'.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221107230420.4192307-4-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/prog_tests/btf_dump.c             | 4 ++--
 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c      | 2 ++
 tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 24da335482d4..0ba2e8b9c6ac 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -791,11 +791,11 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 	TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
 				sizeof(struct bpf_sock_ops) - 1,
 				"(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
-				{ .op = 1, .skb_tcp_flags = 2});
+				{ .op = 1, .skb_hwtstamp = 2});
 	TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
 				sizeof(struct bpf_sock_ops) - 1,
 				"(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
-				{ .op = 1, .skb_tcp_flags = 0});
+				{ .op = 1, .skb_hwtstamp = 0});
 }
 
 static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 57191773572a..5cf85d0f9827 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -505,6 +505,8 @@ static void misc(void)
 
 	ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin");
 
+	ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
+
 check_linum:
 	ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
 	sk_fds_close(&sk_fds);
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index 2c121c5d66a7..d487153a839d 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -27,6 +27,7 @@ unsigned int nr_pure_ack = 0;
 unsigned int nr_data = 0;
 unsigned int nr_syn = 0;
 unsigned int nr_fin = 0;
+unsigned int nr_hwtstamp = 0;
 
 /* Check the header received from the active side */
 static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
@@ -146,6 +147,9 @@ static int check_active_hdr_in(struct bpf_sock_ops *skops)
 	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
 		nr_pure_ack++;
 
+	if (skops->skb_hwtstamp)
+		nr_hwtstamp++;
+
 	return CG_OK;
 }
 
-- 
cgit v1.2.3-58-ga151


From eb6af4ceda2d885416d8382f096030d39896aafc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Nov 2022 10:12:42 -0800
Subject: selftests/bpf: fix veristat's singular file-or-prog filter

Fix the bug of filtering out filename too early, before we know the
program name, if using unified file-or-prog filter (i.e., -f
<any-glob>). Because we try to filter BPF object file early without
opening and parsing it, if any_glob (file-or-prog) filter is used we
have to accept any filename just to get program name, which might match
any_glob.

Fixes: 10b1b3f3e56a ("selftests/bpf: consolidate and improve file/prog filtering in veristat")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20221111181242.2101192-1-andrii@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 9e3811ab4866..f961b49b8ef4 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -367,7 +367,13 @@ static bool should_process_file_prog(const char *filename, const char *prog_name
 		if (f->any_glob) {
 			if (glob_matches(filename, f->any_glob))
 				return true;
-			if (prog_name && glob_matches(prog_name, f->any_glob))
+			/* If we don't know program name yet, any_glob filter
+			 * has to assume that current BPF object file might be
+			 * relevant; we'll check again later on after opening
+			 * BPF object file, at which point program name will
+			 * be known finally.
+			 */
+			if (!prog_name || glob_matches(prog_name, f->any_glob))
 				return true;
 		} else {
 			if (f->file_glob && !glob_matches(filename, f->file_glob))
-- 
cgit v1.2.3-58-ga151


From 31b4e63eb24a81770b0e46a768f606a114787262 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 10 Nov 2022 15:23:21 -0800
Subject: selftests: mptcp: use max_time instead of time

'time' is the local variable of run_test() function, while 'max_time' is
the local variable of do_transfer() function. So in do_transfer(),
$max_time should be used, not $time.

Please note that here $time == $max_time so the behaviour is not changed
but the right variable is used.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index ffa13a957a36..af70c14e0bf9 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -173,7 +173,7 @@ do_transfer()
 
 	timeout ${timeout_test} \
 		ip netns exec ${ns3} \
-			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \
+			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
 				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
@@ -181,7 +181,7 @@ do_transfer()
 
 	timeout ${timeout_test} \
 		ip netns exec ${ns1} \
-			./mptcp_connect -jt ${timeout_poll} -p $port -T $time \
+			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
 				10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
-- 
cgit v1.2.3-58-ga151


From dfd0afbf151d85411b371e841f62b81ee5d1ca54 Mon Sep 17 00:00:00 2001
From: David Michael <fedora.dm0@gmail.com>
Date: Sun, 13 Nov 2022 15:52:17 -0500
Subject: libbpf: Fix uninitialized warning in btf_dump_dump_type_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 11.3.0 fails to compile btf_dump.c due to the following error,
which seems to originate in btf_dump_struct_data where the returned
value would be uninitialized if btf_vlen returns zero.

btf_dump.c: In function ‘btf_dump_dump_type_data’:
btf_dump.c:2363:12: error: ‘err’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
 2363 |         if (err < 0)
      |            ^

Fixes: 920d16af9b42 ("libbpf: BTF dumper support for typed data")
Signed-off-by: David Michael <fedora.dm0@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/87zgcu60hq.fsf@gmail.com
---
 tools/lib/bpf/btf_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 12f7039e0ab2..e9f849d82124 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1989,7 +1989,7 @@ static int btf_dump_struct_data(struct btf_dump *d,
 {
 	const struct btf_member *m = btf_members(t);
 	__u16 n = btf_vlen(t);
-	int i, err;
+	int i, err = 0;
 
 	/* note that we increment depth before calling btf_dump_print() below;
 	 * this is intentional.  btf_dump_data_newline() will not print a
-- 
cgit v1.2.3-58-ga151


From c7694ac340b0394afba079fc3d1072f4c99bcfc9 Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Mon, 14 Nov 2022 04:06:46 +0900
Subject: libbpf: checkpatch: Fixed code alignments in btf.c

Fixed some checkpatch issues in btf.c

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-2-tegongkang@gmail.com
---
 tools/lib/bpf/btf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3bd812bf88ff..71e165b09ed5 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1724,7 +1724,8 @@ err_out:
 	memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
 
 	/* and now restore original strings section size; types data size
-	 * wasn't modified, so doesn't need restoring, see big comment above */
+	 * wasn't modified, so doesn't need restoring, see big comment above
+	 */
 	btf->hdr->str_len = old_strs_len;
 
 	hashmap__free(p.str_off_map);
@@ -2329,7 +2330,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
  */
 int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
 {
-	if (!value|| !value[0])
+	if (!value || !value[0])
 		return libbpf_err(-EINVAL);
 
 	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
-- 
cgit v1.2.3-58-ga151


From e3ba8e4e8c19a9b4d1866bc72d81afb10c043560 Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Mon, 14 Nov 2022 04:06:47 +0900
Subject: libbpf: Fixed various checkpatch issues in libbpf.c

Fixed following checkpatch issues:

WARNING: Block comments use a trailing */ on a separate line
+        * other BPF program's BTF object */

WARNING: Possible repeated word: 'be'
+        * name. This is important to be be able to find corresponding BTF

ERROR: switch and case should be at the same indent
+       switch (ext->kcfg.sz) {
+               case 1: *(__u8 *)ext_val = value; break;
+               case 2: *(__u16 *)ext_val = value; break;
+               case 4: *(__u32 *)ext_val = value; break;
+               case 8: *(__u64 *)ext_val = value; break;
+               default:

ERROR: trailing statements should be on next line
+               case 1: *(__u8 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 2: *(__u16 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 4: *(__u32 *)ext_val = value; break;

ERROR: trailing statements should be on next line
+               case 8: *(__u64 *)ext_val = value; break;

ERROR: code indent should use tabs where possible
+                }$

WARNING: please, no spaces at the start of a line
+                }$

WARNING: Block comments use a trailing */ on a separate line
+        * for faster search */

ERROR: code indent should use tabs where possible
+^I^I^I^I^I^I        &ext->kcfg.is_signed);$

WARNING: braces {} are not necessary for single statement blocks
+       if (err) {
+               return err;
+       }

ERROR: code indent should use tabs where possible
+^I^I^I^I        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);$

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-3-tegongkang@gmail.com
---
 tools/lib/bpf/libbpf.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1d263885d635..b5df6aca06ea 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -347,7 +347,8 @@ enum sec_def_flags {
 	SEC_ATTACHABLE = 2,
 	SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 	/* attachment target is specified through BTF ID in either kernel or
-	 * other BPF program's BTF object */
+	 * other BPF program's BTF object
+	 */
 	SEC_ATTACH_BTF = 4,
 	/* BPF program type allows sleeping/blocking in kernel */
 	SEC_SLEEPABLE = 8,
@@ -488,7 +489,7 @@ struct bpf_map {
 	char *name;
 	/* real_name is defined for special internal maps (.rodata*,
 	 * .data*, .bss, .kconfig) and preserves their original ELF section
-	 * name. This is important to be be able to find corresponding BTF
+	 * name. This is important to be able to find corresponding BTF
 	 * DATASEC information.
 	 */
 	char *real_name;
@@ -1863,12 +1864,20 @@ static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
 		return -ERANGE;
 	}
 	switch (ext->kcfg.sz) {
-		case 1: *(__u8 *)ext_val = value; break;
-		case 2: *(__u16 *)ext_val = value; break;
-		case 4: *(__u32 *)ext_val = value; break;
-		case 8: *(__u64 *)ext_val = value; break;
-		default:
-			return -EINVAL;
+	case 1:
+		*(__u8 *)ext_val = value;
+		break;
+	case 2:
+		*(__u16 *)ext_val = value;
+		break;
+	case 4:
+		*(__u32 *)ext_val = value;
+		break;
+	case 8:
+		*(__u64 *)ext_val = value;
+		break;
+	default:
+		return -EINVAL;
 	}
 	ext->is_set = true;
 	return 0;
@@ -2770,7 +2779,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 				m->type = enum64_placeholder_id;
 				m->offset = 0;
 			}
-                }
+		}
 	}
 
 	return 0;
@@ -3518,7 +3527,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 	}
 
 	/* sort BPF programs by section name and in-section instruction offset
-	 * for faster search */
+	 * for faster search
+	 */
 	if (obj->nr_programs)
 		qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
 
@@ -3817,7 +3827,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 				return -EINVAL;
 			}
 			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
-						        &ext->kcfg.is_signed);
+							&ext->kcfg.is_signed);
 			if (ext->kcfg.type == KCFG_UNKNOWN) {
 				pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
 				return -ENOTSUP;
@@ -4965,9 +4975,9 @@ bpf_object__reuse_map(struct bpf_map *map)
 
 	err = bpf_map__reuse_fd(map, pin_fd);
 	close(pin_fd);
-	if (err) {
+	if (err)
 		return err;
-	}
+
 	map->pinned = true;
 	pr_debug("reused pinned map at '%s'\n", map->pin_path);
 
@@ -5485,7 +5495,7 @@ static int load_module_btfs(struct bpf_object *obj)
 		}
 
 		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
-				        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+					sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 		if (err)
 			goto err_out;
 
@@ -6237,7 +6247,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 		 * prog; each main prog can have a different set of
 		 * subprograms appended (potentially in different order as
 		 * well), so position of any subprog can be different for
-		 * different main programs */
+		 * different main programs
+		 */
 		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
 
 		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
@@ -10995,7 +11006,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
 
 	usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
 	link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
-				        usdt_provider, usdt_name, usdt_cookie);
+					usdt_provider, usdt_name, usdt_cookie);
 	err = libbpf_get_error(link);
 	if (err)
 		return libbpf_err_ptr(err);
@@ -12304,7 +12315,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
 	btf = bpf_object__btf(s->obj);
 	if (!btf) {
 		pr_warn("subskeletons require BTF at runtime (object %s)\n",
-		        bpf_object__name(s->obj));
+			bpf_object__name(s->obj));
 		return libbpf_err(-errno);
 	}
 
-- 
cgit v1.2.3-58-ga151


From b486d19a0ab097eecf3ee679369b216d2cb6c34e Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Mon, 14 Nov 2022 04:06:48 +0900
Subject: libbpf: checkpatch: Fixed code alignments in ringbuf.c

Fixed some checkpatch issues in ringbuf.c

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221113190648.38556-4-tegongkang@gmail.com
---
 tools/lib/bpf/ringbuf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index d285171d4b69..51808c5f0014 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -128,7 +128,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	/* Map read-only producer page and data pages. We map twice as big
 	 * data size to allow simple reading of samples that wrap around the
 	 * end of a ring buffer. See kernel implementation for details.
-	 * */
+	 */
 	tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
 		   MAP_SHARED, map_fd, rb->page_size);
 	if (tmp == MAP_FAILED) {
@@ -220,7 +220,7 @@ static inline int roundup_len(__u32 len)
 	return (len + 7) / 8 * 8;
 }
 
-static int64_t ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring *r)
 {
 	int *len_ptr, len, err;
 	/* 64-bit to avoid overflow in case of extreme application behavior */
-- 
cgit v1.2.3-58-ga151


From f0c5941ff5b255413d31425bb327c2aec3625673 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:25 +0530
Subject: bpf: Support bpf_list_head in map values

Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.

The definition of bpf_list_head in a map value will be done as follows:

struct foo {
	struct bpf_list_node node;
	int data;
};

struct map_value {
	struct bpf_list_head head __contains(foo, node);
};

Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.

The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.

This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.

For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.

Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.

While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            |  17 +++++
 include/uapi/linux/bpf.h       |  10 +++
 kernel/bpf/btf.c               | 145 ++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/helpers.c           |  32 +++++++++
 kernel/bpf/syscall.c           |  22 ++++++-
 kernel/bpf/verifier.c          |   7 ++
 tools/include/uapi/linux/bpf.h |  10 +++
 7 files changed, 239 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f08eb2d27de0..05f98e9e5c48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -175,6 +175,7 @@ enum btf_field_type {
 	BPF_KPTR_UNREF = (1 << 2),
 	BPF_KPTR_REF   = (1 << 3),
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
+	BPF_LIST_HEAD  = (1 << 4),
 };
 
 struct btf_field_kptr {
@@ -184,11 +185,18 @@ struct btf_field_kptr {
 	u32 btf_id;
 };
 
+struct btf_field_list_head {
+	struct btf *btf;
+	u32 value_btf_id;
+	u32 node_offset;
+};
+
 struct btf_field {
 	u32 offset;
 	enum btf_field_type type;
 	union {
 		struct btf_field_kptr kptr;
+		struct btf_field_list_head list_head;
 	};
 };
 
@@ -266,6 +274,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return "kptr";
+	case BPF_LIST_HEAD:
+		return "bpf_list_head";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -282,6 +292,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return sizeof(u64);
+	case BPF_LIST_HEAD:
+		return sizeof(struct bpf_list_head);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -298,6 +310,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return __alignof__(u64);
+	case BPF_LIST_HEAD:
+		return __alignof__(struct bpf_list_head);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -403,6 +417,9 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 			   bool lock_src);
 void bpf_timer_cancel_and_free(void *timer);
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+			struct bpf_spin_lock *spin_lock);
+
 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
 
 struct bpf_offload_dev;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fb4c911d2a03..6580448e9f77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6888,6 +6888,16 @@ struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_list_head {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 12361d7b2498..c0d73d71c539 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3205,9 +3205,15 @@ enum {
 struct btf_field_info {
 	enum btf_field_type type;
 	u32 off;
-	struct {
-		u32 type_id;
-	} kptr;
+	union {
+		struct {
+			u32 type_id;
+		} kptr;
+		struct {
+			const char *node_name;
+			u32 value_btf_id;
+		} list_head;
+	};
 };
 
 static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
@@ -3261,6 +3267,63 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 	return BTF_FIELD_FOUND;
 }
 
+static const char *btf_find_decl_tag_value(const struct btf *btf,
+					   const struct btf_type *pt,
+					   int comp_idx, const char *tag_key)
+{
+	int i;
+
+	for (i = 1; i < btf_nr_types(btf); i++) {
+		const struct btf_type *t = btf_type_by_id(btf, i);
+		int len = strlen(tag_key);
+
+		if (!btf_type_is_decl_tag(t))
+			continue;
+		if (pt != btf_type_by_id(btf, t->type) ||
+		    btf_type_decl_tag(t)->component_idx != comp_idx)
+			continue;
+		if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
+			continue;
+		return __btf_name_by_offset(btf, t->name_off) + len;
+	}
+	return NULL;
+}
+
+static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
+			      const struct btf_type *t, int comp_idx,
+			      u32 off, int sz, struct btf_field_info *info)
+{
+	const char *value_type;
+	const char *list_node;
+	s32 id;
+
+	if (!__btf_type_is_struct(t))
+		return BTF_FIELD_IGNORE;
+	if (t->size != sz)
+		return BTF_FIELD_IGNORE;
+	value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
+	if (!value_type)
+		return -EINVAL;
+	list_node = strstr(value_type, ":");
+	if (!list_node)
+		return -EINVAL;
+	value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+	if (!value_type)
+		return -ENOMEM;
+	id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
+	kfree(value_type);
+	if (id < 0)
+		return id;
+	list_node++;
+	if (str_is_empty(list_node))
+		return -EINVAL;
+	info->type = BPF_LIST_HEAD;
+	info->off = off;
+	info->list_head.value_btf_id = id;
+	info->list_head.node_name = list_node;
+	return BTF_FIELD_FOUND;
+}
+
 static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
 			      int *align, int *sz)
 {
@@ -3284,6 +3347,12 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
 			goto end;
 		}
 	}
+	if (field_mask & BPF_LIST_HEAD) {
+		if (!strcmp(name, "bpf_list_head")) {
+			type = BPF_LIST_HEAD;
+			goto end;
+		}
+	}
 	/* Only return BPF_KPTR when all other types with matchable names fail */
 	if (field_mask & BPF_KPTR) {
 		type = BPF_KPTR_REF;
@@ -3339,6 +3408,12 @@ static int btf_find_struct_field(const struct btf *btf,
 			if (ret < 0)
 				return ret;
 			break;
+		case BPF_LIST_HEAD:
+			ret = btf_find_list_head(btf, t, member_type, i, off, sz,
+						 idx < info_cnt ? &info[idx] : &tmp);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -3393,6 +3468,12 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 			if (ret < 0)
 				return ret;
 			break;
+		case BPF_LIST_HEAD:
+			ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
+						 idx < info_cnt ? &info[idx] : &tmp);
+			if (ret < 0)
+				return ret;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -3491,11 +3572,52 @@ end_btf:
 	return ret;
 }
 
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+			       struct btf_field_info *info)
+{
+	const struct btf_type *t, *n = NULL;
+	const struct btf_member *member;
+	u32 offset;
+	int i;
+
+	t = btf_type_by_id(btf, info->list_head.value_btf_id);
+	/* We've already checked that value_btf_id is a struct type. We
+	 * just need to figure out the offset of the list_node, and
+	 * verify its type.
+	 */
+	for_each_member(i, t, member) {
+		if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off)))
+			continue;
+		/* Invalid BTF, two members with same name */
+		if (n)
+			return -EINVAL;
+		n = btf_type_by_id(btf, member->type);
+		if (!__btf_type_is_struct(n))
+			return -EINVAL;
+		if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+			return -EINVAL;
+		offset = __btf_member_bit_offset(n, member);
+		if (offset % 8)
+			return -EINVAL;
+		offset /= 8;
+		if (offset % __alignof__(struct bpf_list_node))
+			return -EINVAL;
+
+		field->list_head.btf = (struct btf *)btf;
+		field->list_head.value_btf_id = info->list_head.value_btf_id;
+		field->list_head.node_offset = offset;
+	}
+	if (!n)
+		return -ENOENT;
+	return 0;
+}
+
 struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
 				    u32 field_mask, u32 value_size)
 {
 	struct btf_field_info info_arr[BTF_FIELDS_MAX];
 	struct btf_record *rec;
+	u32 next_off = 0;
 	int ret, i, cnt;
 
 	ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
@@ -3517,6 +3639,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			ret = -EFAULT;
 			goto end;
 		}
+		if (info_arr[i].off < next_off) {
+			ret = -EEXIST;
+			goto end;
+		}
+		next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type);
 
 		rec->field_mask |= info_arr[i].type;
 		rec->fields[i].offset = info_arr[i].off;
@@ -3539,12 +3666,24 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			if (ret < 0)
 				goto end;
 			break;
+		case BPF_LIST_HEAD:
+			ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]);
+			if (ret < 0)
+				goto end;
+			break;
 		default:
 			ret = -EFAULT;
 			goto end;
 		}
 		rec->cnt++;
 	}
+
+	/* bpf_list_head requires bpf_spin_lock */
+	if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+		ret = -EINVAL;
+		goto end;
+	}
+
 	return rec;
 end:
 	btf_record_free(rec);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 283f55bbeb70..7bc71995f17c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1706,6 +1706,38 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+void bpf_list_head_free(const struct btf_field *field, void *list_head,
+			struct bpf_spin_lock *spin_lock)
+{
+	struct list_head *head = list_head, *orig_head = list_head;
+
+	BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
+	BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
+
+	/* Do the actual list draining outside the lock to not hold the lock for
+	 * too long, and also prevent deadlocks if tracing programs end up
+	 * executing on entry/exit of functions called inside the critical
+	 * section, and end up doing map ops that call bpf_list_head_free for
+	 * the same map value again.
+	 */
+	__bpf_spin_lock_irqsave(spin_lock);
+	if (!head->next || list_empty(head))
+		goto unlock;
+	head = head->next;
+unlock:
+	INIT_LIST_HEAD(orig_head);
+	__bpf_spin_unlock_irqrestore(spin_lock);
+
+	while (head != orig_head) {
+		void *obj = head;
+
+		obj -= field->list_head.node_offset;
+		head = head->next;
+		/* TODO: Rework later */
+		kfree(obj);
+	}
+}
+
 BTF_SET8_START(tracing_btf_ids)
 #ifdef CONFIG_KEXEC_CORE
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 85532d301124..fdbae52f463f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -536,6 +536,9 @@ void btf_record_free(struct btf_record *rec)
 				module_put(rec->fields[i].kptr.module);
 			btf_put(rec->fields[i].kptr.btf);
 			break;
+		case BPF_LIST_HEAD:
+			/* Nothing to release for bpf_list_head */
+			break;
 		default:
 			WARN_ON_ONCE(1);
 			continue;
@@ -578,6 +581,9 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 				goto free;
 			}
 			break;
+		case BPF_LIST_HEAD:
+			/* Nothing to acquire for bpf_list_head */
+			break;
 		default:
 			ret = -EFAULT;
 			WARN_ON_ONCE(1);
@@ -637,6 +643,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 		case BPF_KPTR_REF:
 			field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
 			break;
+		case BPF_LIST_HEAD:
+			if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+				continue;
+			bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
+			break;
 		default:
 			WARN_ON_ONCE(1);
 			continue;
@@ -965,7 +976,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	if (!value_type || value_size != map->value_size)
 		return -EINVAL;
 
-	map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR,
+	map->record = btf_parse_fields(btf, value_type,
+				       BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
 				       map->value_size);
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
@@ -1012,6 +1024,14 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 					goto free_map_tab;
 				}
 				break;
+			case BPF_LIST_HEAD:
+				if (map->map_type != BPF_MAP_TYPE_HASH &&
+				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+				    map->map_type != BPF_MAP_TYPE_ARRAY) {
+					ret = -EOPNOTSUPP;
+					goto free_map_tab;
+				}
+				break;
 			default:
 				/* Fail if map_type checks are missing for a field type */
 				ret = -EOPNOTSUPP;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 07c0259dfc1a..a50018e2d4a0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12814,6 +12814,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
 
+	if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+		if (is_tracing_prog_type(prog_type)) {
+			verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+			return -EINVAL;
+		}
+	}
+
 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fb4c911d2a03..6580448e9f77 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6888,6 +6888,16 @@ struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_list_head {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_list_node {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
cgit v1.2.3-58-ga151


From 894f2a8b1673a355a1a7507a4dfa6a3c836d07c1 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 15 Nov 2022 00:45:27 +0530
Subject: bpf: Rename MEM_ALLOC to MEM_RINGBUF

Currently, verifier uses MEM_ALLOC type tag to specially tag memory
returned from bpf_ringbuf_reserve helper. However, this is currently
only used for this purpose and there is an implicit assumption that it
only refers to ringbuf memory (e.g. the check for ARG_PTR_TO_ALLOC_MEM
in check_func_arg_reg_off).

Hence, rename MEM_ALLOC to MEM_RINGBUF to indicate this special
relationship and instead open the use of MEM_ALLOC for more generic
allocations made for user types.

Also, since ARG_PTR_TO_ALLOC_MEM_OR_NULL is unused, simply drop it.

Finally, update selftests using 'alloc_' verifier string to 'ringbuf_'.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                               | 11 ++++-------
 kernel/bpf/ringbuf.c                              |  6 +++---
 kernel/bpf/verifier.c                             | 14 +++++++-------
 tools/testing/selftests/bpf/prog_tests/dynptr.c   |  2 +-
 tools/testing/selftests/bpf/verifier/ringbuf.c    |  2 +-
 tools/testing/selftests/bpf/verifier/spill_fill.c |  2 +-
 6 files changed, 17 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fe3ec620d54..afc1c51b59ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -488,10 +488,8 @@ enum bpf_type_flag {
 	 */
 	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),
 
-	/* MEM was "allocated" from a different helper, and cannot be mixed
-	 * with regular non-MEM_ALLOC'ed MEM types.
-	 */
-	MEM_ALLOC		= BIT(2 + BPF_BASE_TYPE_BITS),
+	/* MEM points to BPF ring buffer reservation. */
+	MEM_RINGBUF		= BIT(2 + BPF_BASE_TYPE_BITS),
 
 	/* MEM is in user address space. */
 	MEM_USER		= BIT(3 + BPF_BASE_TYPE_BITS),
@@ -565,7 +563,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
 	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
-	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
+	ARG_PTR_TO_RINGBUF_MEM,	/* pointer to dynamically reserved ringbuf memory */
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
@@ -582,7 +580,6 @@ enum bpf_arg_type {
 	ARG_PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
 	ARG_PTR_TO_CTX_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
 	ARG_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
-	ARG_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
 	ARG_PTR_TO_STACK_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
 	ARG_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
 	/* pointer to memory does not need to be initialized, helper function must fill
@@ -617,7 +614,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
 	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
-	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_MEM,
+	RET_PTR_TO_RINGBUF_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
 	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
 
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 9e832acf4692..80f4b4d88aaf 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -447,7 +447,7 @@ BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
 
 const struct bpf_func_proto bpf_ringbuf_reserve_proto = {
 	.func		= bpf_ringbuf_reserve,
-	.ret_type	= RET_PTR_TO_ALLOC_MEM_OR_NULL,
+	.ret_type	= RET_PTR_TO_RINGBUF_MEM_OR_NULL,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 	.arg3_type	= ARG_ANYTHING,
@@ -490,7 +490,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
 const struct bpf_func_proto bpf_ringbuf_submit_proto = {
 	.func		= bpf_ringbuf_submit,
 	.ret_type	= RET_VOID,
-	.arg1_type	= ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
+	.arg1_type	= ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
 	.arg2_type	= ARG_ANYTHING,
 };
 
@@ -503,7 +503,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
 const struct bpf_func_proto bpf_ringbuf_discard_proto = {
 	.func		= bpf_ringbuf_discard,
 	.ret_type	= RET_VOID,
-	.arg1_type	= ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
+	.arg1_type	= ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
 	.arg2_type	= ARG_ANYTHING,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c88da7e3ca74..c588e5483540 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -577,8 +577,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 
 	if (type & MEM_RDONLY)
 		strncpy(prefix, "rdonly_", 32);
-	if (type & MEM_ALLOC)
-		strncpy(prefix, "alloc_", 32);
+	if (type & MEM_RINGBUF)
+		strncpy(prefix, "ringbuf_", 32);
 	if (type & MEM_USER)
 		strncpy(prefix, "user_", 32);
 	if (type & MEM_PERCPU)
@@ -5785,7 +5785,7 @@ static const struct bpf_reg_types mem_types = {
 		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 		PTR_TO_MEM,
-		PTR_TO_MEM | MEM_ALLOC,
+		PTR_TO_MEM | MEM_RINGBUF,
 		PTR_TO_BUF,
 	},
 };
@@ -5803,7 +5803,7 @@ static const struct bpf_reg_types int_ptr_types = {
 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
+static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5836,7 +5836,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
 	[ARG_PTR_TO_MEM]		= &mem_types,
-	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
+	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
 	[ARG_PTR_TO_INT]		= &int_ptr_types,
 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
@@ -5957,14 +5957,14 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MEM:
 	case PTR_TO_MEM | MEM_RDONLY:
-	case PTR_TO_MEM | MEM_ALLOC:
+	case PTR_TO_MEM | MEM_RINGBUF:
 	case PTR_TO_BUF:
 	case PTR_TO_BUF | MEM_RDONLY:
 	case SCALAR_VALUE:
 		/* Some of the argument types nevertheless require a
 		 * zero register offset.
 		 */
-		if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
+		if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM)
 			return 0;
 		break;
 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 8fc4e6c02bfd..b0c06f821cb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -17,7 +17,7 @@ static struct {
 	{"ringbuf_missing_release2", "Unreleased reference id=2"},
 	{"ringbuf_missing_release_callback", "Unreleased reference id"},
 	{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
-	{"ringbuf_invalid_api", "type=mem expected=alloc_mem"},
+	{"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"},
 	{"add_dynptr_to_map1", "invalid indirect read from stack"},
 	{"add_dynptr_to_map2", "invalid indirect read from stack"},
 	{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
index b64d33e4833c..84838feba47f 100644
--- a/tools/testing/selftests/bpf/verifier/ringbuf.c
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -28,7 +28,7 @@
 	},
 	.fixup_map_ringbuf = { 1 },
 	.result = REJECT,
-	.errstr = "dereference of modified alloc_mem ptr R1",
+	.errstr = "dereference of modified ringbuf_mem ptr R1",
 },
 {
 	"ringbuf: invalid reservation offset 2",
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index e23f07175e1b..9bb302dade23 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -84,7 +84,7 @@
 	},
 	.fixup_map_ringbuf = { 1 },
 	.result = REJECT,
-	.errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
+	.errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited",
 },
 {
 	"check corrupted spill/fill",
-- 
cgit v1.2.3-58-ga151


From 4741c371aa088a951bd65f995d2dfe7cd29b4b9a Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 16 Nov 2022 00:48:59 +0200
Subject: selftests/bpf: check nullness propagation for reg to reg comparisons

Verify that nullness information is porpagated in the branches of
register to register JEQ and JNE operations.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221115224859.2452988-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/verifier/jeq_infer_not_null.c    | 174 +++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c
new file mode 100644
index 000000000000..67a1c07ead34
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c
@@ -0,0 +1,174 @@
+{
+	/* This is equivalent to the following program:
+	 *
+	 *   r6 = skb->sk;
+	 *   r7 = sk_fullsock(r6);
+	 *   r0 = sk_fullsock(r6);
+	 *   if (r0 == 0) return 0;    (a)
+	 *   if (r0 != r7) return 0;   (b)
+	 *   *r7->type;                (c)
+	 *   return 0;
+	 *
+	 * It is safe to dereference r7 at point (c), because of (a) and (b).
+	 * The test verifies that relation r0 == r7 is propagated from (b) to (c).
+	 */
+	"jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch",
+	.insns = {
+	/* r6 = skb->sk; */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	/* if (r6 == 0) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8),
+	/* r7 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r0 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	/* if (r0 == null) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	/* if (r0 == r7) r0 = *(r7->type); */
+	BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+	/* return 0 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R7 pointer comparison",
+},
+{
+	/* Same as above, but verify that another branch of JNE still
+	 * prohibits access to PTR_MAYBE_NULL.
+	 */
+	"jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch",
+	.insns = {
+	/* r6 = skb->sk */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	/* if (r6 == 0) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9),
+	/* r7 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r0 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	/* if (r0 == null) return 0; */
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+	/* if (r0 == r7) return 0; */
+	BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	/* r0 = *(r7->type); */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+	/* return 0 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "R7 invalid mem access 'sock_or_null'",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R7 pointer comparison",
+},
+{
+	/* Same as a first test, but not null should be inferred for JEQ branch */
+	"jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch",
+	.insns = {
+	/* r6 = skb->sk; */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	/* if (r6 == null) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9),
+	/* r7 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r0 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	/* if (r0 == null) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+	/* if (r0 != r7) return 0; */
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	/* r0 = *(r7->type); */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+	/* return 0; */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R7 pointer comparison",
+},
+{
+	/* Same as above, but verify that another branch of JNE still
+	 * prohibits access to PTR_MAYBE_NULL.
+	 */
+	"jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch",
+	.insns = {
+	/* r6 = skb->sk; */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	/* if (r6 == null) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8),
+	/* r7 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r0 = sk_fullsock(skb); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	/* if (r0 == null) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	/* if (r0 != r7) r0 = *(r7->type); */
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+	/* return 0; */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "R7 invalid mem access 'sock_or_null'",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R7 pointer comparison",
+},
+{
+	/* Maps are treated in a different branch of `mark_ptr_not_null_reg`,
+	 * so separate test for maps case.
+	 */
+	"jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE",
+	.insns = {
+	/* r9 = &some stack to use as key */
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -8),
+	/* r8 = process local map */
+	BPF_LD_MAP_FD(BPF_REG_8, 0),
+	/* r6 = map_lookup_elem(r8, r9); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* r7 = map_lookup_elem(r8, r9); */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* if (r6 == 0) return 0; */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+	/* if (r6 != r7) return 0; */
+	BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1),
+	/* read *r7; */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_xdp_sock, queue_id)),
+	/* return 0; */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_xskmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_XDP,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-58-ga151


From df9c41e9db2d0fc9feba0423c21e4319d8acd033 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Tue, 15 Nov 2022 21:00:07 +0800
Subject: bpftool: Check argc first before "file" in do_batch()

If the parameters for batch are more than 2, check argc first can
return immediately, no need to use is_prefix() to check "file" with
a little overhead and then check argc, it is better to check "file"
only when the parameters for batch are 2.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/1668517207-11822-1-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 741e50ee0b6c..337ab7977ea4 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -337,12 +337,12 @@ static int do_batch(int argc, char **argv)
 	if (argc < 2) {
 		p_err("too few parameters for batch");
 		return -1;
-	} else if (!is_prefix(*argv, "file")) {
-		p_err("expected 'file', got: %s", *argv);
-		return -1;
 	} else if (argc > 2) {
 		p_err("too many parameters for batch");
 		return -1;
+	} else if (!is_prefix(*argv, "file")) {
+		p_err("expected 'file', got: %s", *argv);
+		return -1;
 	}
 	NEXT_ARG();
 
-- 
cgit v1.2.3-58-ga151


From 5b1d640800de7fe02d68bf592d9d101de24c87f2 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 16 Nov 2022 03:54:56 +0200
Subject: selftests/bpf: allow unpriv bpf for selftests by default

Enable unprivileged bpf for selftests kernel by default.
This forces CI to run test_verifier tests in both privileged
and unprivileged modes.

The test_verifier.c:do_test uses sysctl kernel.unprivileged_bpf_disabled
to decide whether to run or to skip test cases in unprivileged mode.
The CONFIG_BPF_UNPRIV_DEFAULT_OFF controls the default value of the
kernel.unprivileged_bpf_disabled.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221116015456.2461135-1-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 7a99a6728169..f9034ea00bc9 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -8,6 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y
 CONFIG_BPF_LSM=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_UNPRIV_DEFAULT_OFF=n
 CONFIG_CGROUP_BPF=y
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_SHA256=y
-- 
cgit v1.2.3-58-ga151


From c453e64cbc9532c0c2edfa999c35d29dad16b8bb Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Tue, 15 Nov 2022 11:29:40 +0800
Subject: selftests/bpf: fix memory leak of lsm_cgroup

kmemleak reports this issue:

unreferenced object 0xffff88810b7835c0 (size 32):
  comm "test_progs", pid 270, jiffies 4294969007 (age 1621.315s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    03 00 00 00 03 00 00 00 0f 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000376cdeab>] kmalloc_trace+0x27/0x110
    [<000000003bcdb3b6>] selinux_sk_alloc_security+0x66/0x110
    [<000000003959008f>] security_sk_alloc+0x47/0x80
    [<00000000e7bc6668>] sk_prot_alloc+0xbd/0x1a0
    [<0000000002d6343a>] sk_alloc+0x3b/0x940
    [<000000009812a46d>] unix_create1+0x8f/0x3d0
    [<000000005ed0976b>] unix_create+0xa1/0x150
    [<0000000086a1d27f>] __sock_create+0x233/0x4a0
    [<00000000cffe3a73>] __sys_socket_create.part.0+0xaa/0x110
    [<0000000007c63f20>] __sys_socket+0x49/0xf0
    [<00000000b08753c8>] __x64_sys_socket+0x42/0x50
    [<00000000b56e26b3>] do_syscall_64+0x3b/0x90
    [<000000009b4871b8>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

The issue occurs in the following scenarios:

unix_create1()
  sk_alloc()
    sk_prot_alloc()
      security_sk_alloc()
        call_int_hook()
          hlist_for_each_entry()
            entry1->hook.sk_alloc_security
            <-- selinux_sk_alloc_security() succeeded,
            <-- sk->security alloced here.
            entry2->hook.sk_alloc_security
            <-- bpf_lsm_sk_alloc_security() failed
      goto out_free;
        ...    <-- the sk->security not freed, memleak

The core problem is that the LSM is not yet fully stacked (work is
actively going on in this space) which means that some LSM hooks do
not support multiple LSMs at the same time. To fix, skip the
"EPERM" test when it runs in the environments that already have
non-bpf lsms installed

Fixes: dca85aac8895 ("selftests/bpf: lsm_cgroup functional test")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Cc: Stanislav Fomichev <sdf@google.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/1668482980-16163-1-git-send-email-wangyufen@huawei.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c | 17 +++++++++++++----
 tools/testing/selftests/bpf/progs/lsm_cgroup.c      |  8 ++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
index 1102e4f42d2d..f117bfef68a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -173,10 +173,12 @@ static void test_lsm_cgroup_functional(void)
 	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
 	ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
 
-	/* AF_UNIX is prohibited. */
-
 	fd = socket(AF_UNIX, SOCK_STREAM, 0);
-	ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+	if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR
+	    || skel->kconfig->CONFIG_SECURITY_SELINUX
+	    || skel->kconfig->CONFIG_SECURITY_SMACK))
+		/* AF_UNIX is prohibited. */
+		ASSERT_LT(fd, 0, "socket(AF_UNIX)");
 	close(fd);
 
 	/* AF_INET6 gets default policy (sk_priority). */
@@ -233,11 +235,18 @@ static void test_lsm_cgroup_functional(void)
 
 	/* AF_INET6+SOCK_STREAM
 	 * AF_PACKET+SOCK_RAW
+	 * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed
 	 * listen_fd
 	 * client_fd
 	 * accepted_fd
 	 */
-	ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+	if (skel->kconfig->CONFIG_SECURITY_APPARMOR
+	    || skel->kconfig->CONFIG_SECURITY_SELINUX
+	    || skel->kconfig->CONFIG_SECURITY_SMACK)
+		/* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */
+		ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2");
+	else
+		ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
 
 	/* start_server
 	 * bind(ETH_P_ALL)
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
index 4f2d60b87b75..02c11d16b692 100644
--- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -7,6 +7,10 @@
 
 char _license[] SEC("license") = "GPL";
 
+extern bool CONFIG_SECURITY_SELINUX __kconfig __weak;
+extern bool CONFIG_SECURITY_SMACK __kconfig __weak;
+extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak;
+
 #ifndef AF_PACKET
 #define AF_PACKET 17
 #endif
@@ -140,6 +144,10 @@ SEC("lsm_cgroup/sk_alloc_security")
 int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
 {
 	called_socket_alloc++;
+	/* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */
+	if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR)
+		return 1;
+
 	if (family == AF_UNIX)
 		return 0; /* EPERM */
 
-- 
cgit v1.2.3-58-ga151


From c4525f05ca3ca2795fc626252fb1c1cbb310111b Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@rivosinc.com>
Date: Tue, 15 Nov 2022 19:20:50 +0100
Subject: selftests/bpf: Explicitly pass RESOLVE_BTFIDS to sub-make
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When cross-compiling selftests/bpf, the resolve_btfids binary end up
in a different directory, than the regular resolve_btfids
builds. Populate RESOLVE_BTFIDS for sub-make, so it can find the
binary.

Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20221115182051.582962-1-bjorn@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f3cd17026ee5..aa4e29315f4a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -201,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
 	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
+	$(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
 	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
 DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
-- 
cgit v1.2.3-58-ga151


From 98b2afc8a67f651ed01fc7d5a7e2528e63dd4e08 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@rivosinc.com>
Date: Tue, 15 Nov 2022 19:20:51 +0100
Subject: selftests/bpf: Pass target triple to get_sys_includes macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When cross-compiling [1], the get_sys_includes make macro should use
the target system include path, and not the build hosts system include
path.

Make clang honor the CROSS_COMPILE triple.

[1] e.g. "ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- make"

Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://lore.kernel.org/bpf/20221115182051.582962-2-bjorn@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index aa4e29315f4a..6a0f043dc410 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -310,9 +310,9 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids	\
 # Use '-idirafter': Don't interfere with include mechanics except where the
 # build would have failed anyways.
 define get_sys_includes
-$(shell $(1) -v -E - </dev/null 2>&1 \
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
 	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
 endef
 
 # Determine target endianness.
@@ -320,7 +320,11 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
 			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
 MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) 		\
 	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
 	     -I$(abspath $(OUTPUT)/../usr/include)
@@ -542,7 +546,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 # Define test_progs BPF-GCC-flavored test runner.
 ifneq ($(BPF_GCC),)
 TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,)
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
 endif
 
-- 
cgit v1.2.3-58-ga151


From 00b85860feb809852af9a88cb4ca8766d7dff6a3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:01 +0530
Subject: bpf: Rewrite kfunc argument handling

As we continue to add more features, argument types, kfunc flags, and
different extensions to kfuncs, the code to verify the correctness of
the kfunc prototype wrt the passed in registers has become ad-hoc and
ugly to read. To make life easier, and make a very clear split between
different stages of argument processing, move all the code into
verifier.c and refactor into easier to read helpers and functions.

This also makes sharing code within the verifier easier with kfunc
argument processing. This will be more and more useful in later patches
as we are now moving to implement very core BPF helpers as kfuncs, to
keep them experimental before baking into UAPI.

Remove all kfunc related bits now from btf_check_func_arg_match, as
users have been converted away to refactored kfunc argument handling.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-12-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  11 -
 include/linux/bpf_verifier.h                       |   2 -
 include/linux/btf.h                                |  31 +-
 kernel/bpf/btf.c                                   | 380 +-------------
 kernel/bpf/verifier.c                              | 545 ++++++++++++++++++++-
 .../selftests/bpf/prog_tests/kfunc_dynptr_param.c  |   2 +-
 tools/testing/selftests/bpf/verifier/calls.c       |   2 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  |   4 +-
 8 files changed, 573 insertions(+), 404 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 323985a39ece..0a74df731eb8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2109,22 +2109,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 			   const char *func_name,
 			   struct btf_func_model *m);
 
-struct bpf_kfunc_arg_meta {
-	u64 r0_size;
-	bool r0_rdonly;
-	int ref_obj_id;
-	u32 flags;
-};
-
 struct bpf_reg_state;
 int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 				struct bpf_reg_state *regs);
 int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 			   struct bpf_reg_state *regs);
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
-			      const struct btf *btf, u32 func_id,
-			      struct bpf_reg_state *regs,
-			      struct bpf_kfunc_arg_meta *meta);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			  struct bpf_reg_state *reg);
 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1db2b4dc7009..fb146b0ce006 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -603,8 +603,6 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
 int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
 			   enum bpf_arg_type arg_type);
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-			     u32 regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 		   u32 regno, u32 mem_size);
 bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 42d8f3730a8d..d5b26380a60f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -338,6 +338,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t)
 	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
 }
 
+static inline bool __btf_type_is_struct(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_type_is_array(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+}
+
 static inline u16 btf_type_vlen(const struct btf_type *t)
 {
 	return BTF_INFO_VLEN(t->info);
@@ -439,9 +449,10 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
 	return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
 }
 
-#ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
+struct bpf_verifier_log;
 
+#ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
@@ -455,6 +466,12 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
 int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
 				struct module *owner);
 struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+		      const struct btf_type *t, enum bpf_prog_type prog_type,
+		      int arg);
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+			const struct btf *btf2, u32 id2);
 #else
 static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 						    u32 type_id)
@@ -490,6 +507,18 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf
 {
 	return NULL;
 }
+static inline const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+		      const struct btf_type *t, enum bpf_prog_type prog_type,
+		      int arg)
+{
+	return NULL;
+}
+static inline bool btf_types_are_same(const struct btf *btf1, u32 id1,
+				      const struct btf *btf2, u32 id2)
+{
+	return false;
+}
 #endif
 
 static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 91aa9c96621f..4dcda4ae48c1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -478,16 +478,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
 	return !t || btf_type_nosize(t);
 }
 
-static bool __btf_type_is_struct(const struct btf_type *t)
-{
-	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
-}
-
-static bool btf_type_is_array(const struct btf_type *t)
-{
-	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
-}
-
 static bool btf_type_is_datasec(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
@@ -5536,7 +5526,7 @@ static u8 bpf_ctx_convert_map[] = {
 #undef BPF_MAP_TYPE
 #undef BPF_LINK_TYPE
 
-static const struct btf_member *
+const struct btf_member *
 btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, enum bpf_prog_type prog_type,
 		      int arg)
@@ -6322,8 +6312,8 @@ int btf_struct_access(struct bpf_verifier_log *log,
  * end up with two different module BTFs, but IDs point to the common type in
  * vmlinux BTF.
  */
-static bool btf_types_are_same(const struct btf *btf1, u32 id1,
-			       const struct btf *btf2, u32 id2)
+bool btf_types_are_same(const struct btf *btf1, u32 id1,
+			const struct btf *btf2, u32 id2)
 {
 	if (id1 != id2)
 		return false;
@@ -6605,122 +6595,19 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
 	return btf_check_func_type_match(log, btf1, t1, btf2, t2);
 }
 
-static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
-#ifdef CONFIG_NET
-	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
-	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
-	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
-#endif
-};
-
-/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
-static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
-					const struct btf *btf,
-					const struct btf_type *t, int rec)
-{
-	const struct btf_type *member_type;
-	const struct btf_member *member;
-	u32 i;
-
-	if (!btf_type_is_struct(t))
-		return false;
-
-	for_each_member(i, t, member) {
-		const struct btf_array *array;
-
-		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
-		if (btf_type_is_struct(member_type)) {
-			if (rec >= 3) {
-				bpf_log(log, "max struct nesting depth exceeded\n");
-				return false;
-			}
-			if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1))
-				return false;
-			continue;
-		}
-		if (btf_type_is_array(member_type)) {
-			array = btf_type_array(member_type);
-			if (!array->nelems)
-				return false;
-			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
-			if (!btf_type_is_scalar(member_type))
-				return false;
-			continue;
-		}
-		if (!btf_type_is_scalar(member_type))
-			return false;
-	}
-	return true;
-}
-
-static bool is_kfunc_arg_mem_size(const struct btf *btf,
-				  const struct btf_param *arg,
-				  const struct bpf_reg_state *reg)
-{
-	int len, sfx_len = sizeof("__sz") - 1;
-	const struct btf_type *t;
-	const char *param_name;
-
-	t = btf_type_skip_modifiers(btf, arg->type, NULL);
-	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
-		return false;
-
-	/* In the future, this can be ported to use BTF tagging */
-	param_name = btf_name_by_offset(btf, arg->name_off);
-	if (str_is_empty(param_name))
-		return false;
-	len = strlen(param_name);
-	if (len < sfx_len)
-		return false;
-	param_name += len - sfx_len;
-	if (strncmp(param_name, "__sz", sfx_len))
-		return false;
-
-	return true;
-}
-
-static bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
-				      const struct btf_param *arg,
-				      const struct bpf_reg_state *reg,
-				      const char *name)
-{
-	int len, target_len = strlen(name);
-	const struct btf_type *t;
-	const char *param_name;
-
-	t = btf_type_skip_modifiers(btf, arg->type, NULL);
-	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
-		return false;
-
-	param_name = btf_name_by_offset(btf, arg->name_off);
-	if (str_is_empty(param_name))
-		return false;
-	len = strlen(param_name);
-	if (len != target_len)
-		return false;
-	if (strcmp(param_name, name))
-		return false;
-
-	return true;
-}
-
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 				    const struct btf *btf, u32 func_id,
 				    struct bpf_reg_state *regs,
 				    bool ptr_to_mem_ok,
-				    struct bpf_kfunc_arg_meta *kfunc_meta,
 				    bool processing_call)
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
-	bool rel = false, kptr_get = false, trusted_args = false;
-	bool sleepable = false;
 	struct bpf_verifier_log *log = &env->log;
-	u32 i, nargs, ref_id, ref_obj_id = 0;
-	bool is_kfunc = btf_is_kernel(btf);
 	const char *func_name, *ref_tname;
 	const struct btf_type *t, *ref_t;
 	const struct btf_param *args;
-	int ref_regno = 0, ret;
+	u32 i, nargs, ref_id;
+	int ret;
 
 	t = btf_type_by_id(btf, func_id);
 	if (!t || !btf_type_is_func(t)) {
@@ -6746,14 +6633,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	if (is_kfunc && kfunc_meta) {
-		/* Only kfunc can be release func */
-		rel = kfunc_meta->flags & KF_RELEASE;
-		kptr_get = kfunc_meta->flags & KF_KPTR_GET;
-		trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS;
-		sleepable = kfunc_meta->flags & KF_SLEEPABLE;
-	}
-
 	/* check that BTF function arguments match actual types that the
 	 * verifier sees.
 	 */
@@ -6761,42 +6640,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 		enum bpf_arg_type arg_type = ARG_DONTCARE;
 		u32 regno = i + 1;
 		struct bpf_reg_state *reg = &regs[regno];
-		bool obj_ptr = false;
 
 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
 		if (btf_type_is_scalar(t)) {
-			if (is_kfunc && kfunc_meta) {
-				bool is_buf_size = false;
-
-				/* check for any const scalar parameter of name "rdonly_buf_size"
-				 * or "rdwr_buf_size"
-				 */
-				if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
-							      "rdonly_buf_size")) {
-					kfunc_meta->r0_rdonly = true;
-					is_buf_size = true;
-				} else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg,
-								     "rdwr_buf_size"))
-					is_buf_size = true;
-
-				if (is_buf_size) {
-					if (kfunc_meta->r0_size) {
-						bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
-						return -EINVAL;
-					}
-
-					if (!tnum_is_const(reg->var_off)) {
-						bpf_log(log, "R%d is not a const\n", regno);
-						return -EINVAL;
-					}
-
-					kfunc_meta->r0_size = reg->var_off.value;
-					ret = mark_chain_precision(env, regno);
-					if (ret)
-						return ret;
-				}
-			}
-
 			if (reg->type == SCALAR_VALUE)
 				continue;
 			bpf_log(log, "R%d is not a scalar\n", regno);
@@ -6809,88 +6655,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 			return -EINVAL;
 		}
 
-		/* These register types have special constraints wrt ref_obj_id
-		 * and offset checks. The rest of trusted args don't.
-		 */
-		obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID ||
-			  reg2btf_ids[base_type(reg->type)];
-
-		/* Check if argument must be a referenced pointer, args + i has
-		 * been verified to be a pointer (after skipping modifiers).
-		 * PTR_TO_CTX is ok without having non-zero ref_obj_id.
-		 */
-		if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) {
-			bpf_log(log, "R%d must be referenced\n", regno);
-			return -EINVAL;
-		}
-
 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
 
-		/* Trusted args have the same offset checks as release arguments */
-		if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id))
-			arg_type |= OBJ_RELEASE;
 		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
 		if (ret < 0)
 			return ret;
 
-		if (is_kfunc && reg->ref_obj_id) {
-			/* Ensure only one argument is referenced PTR_TO_BTF_ID */
-			if (ref_obj_id) {
-				bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
-					regno, reg->ref_obj_id, ref_obj_id);
-				return -EFAULT;
-			}
-			ref_regno = regno;
-			ref_obj_id = reg->ref_obj_id;
-		}
-
-		/* kptr_get is only true for kfunc */
-		if (i == 0 && kptr_get) {
-			struct btf_field *kptr_field;
-
-			if (reg->type != PTR_TO_MAP_VALUE) {
-				bpf_log(log, "arg#0 expected pointer to map value\n");
-				return -EINVAL;
-			}
-
-			/* check_func_arg_reg_off allows var_off for
-			 * PTR_TO_MAP_VALUE, but we need fixed offset to find
-			 * off_desc.
-			 */
-			if (!tnum_is_const(reg->var_off)) {
-				bpf_log(log, "arg#0 must have constant offset\n");
-				return -EINVAL;
-			}
-
-			kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
-			if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
-				bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n",
-					reg->off + reg->var_off.value);
-				return -EINVAL;
-			}
-
-			if (!btf_type_is_ptr(ref_t)) {
-				bpf_log(log, "arg#0 BTF type must be a double pointer\n");
-				return -EINVAL;
-			}
-
-			ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id);
-			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
-
-			if (!btf_type_is_struct(ref_t)) {
-				bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
-					func_name, i, btf_type_str(ref_t), ref_tname);
-				return -EINVAL;
-			}
-			if (!btf_struct_ids_match(log, btf, ref_id, 0, kptr_field->kptr.btf,
-						  kptr_field->kptr.btf_id, true)) {
-				bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n",
-					func_name, i, btf_type_str(ref_t), ref_tname);
-				return -EINVAL;
-			}
-			/* rest of the arguments can be anything, like normal kfunc */
-		} else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+		if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
 			/* If function expects ctx type in BTF check that caller
 			 * is passing PTR_TO_CTX.
 			 */
@@ -6900,109 +6672,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 					i, btf_type_str(t));
 				return -EINVAL;
 			}
-		} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID ||
-			   (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) {
-			const struct btf_type *reg_ref_t;
-			const struct btf *reg_btf;
-			const char *reg_ref_tname;
-			u32 reg_ref_id;
-
-			if (!btf_type_is_struct(ref_t)) {
-				bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
-					func_name, i, btf_type_str(ref_t),
-					ref_tname);
-				return -EINVAL;
-			}
-
-			if (reg->type == PTR_TO_BTF_ID) {
-				reg_btf = reg->btf;
-				reg_ref_id = reg->btf_id;
-			} else {
-				reg_btf = btf_vmlinux;
-				reg_ref_id = *reg2btf_ids[base_type(reg->type)];
-			}
-
-			reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
-							    &reg_ref_id);
-			reg_ref_tname = btf_name_by_offset(reg_btf,
-							   reg_ref_t->name_off);
-			if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
-						  reg->off, btf, ref_id,
-						  trusted_args || (rel && reg->ref_obj_id))) {
-				bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
-					func_name, i,
-					btf_type_str(ref_t), ref_tname,
-					regno, btf_type_str(reg_ref_t),
-					reg_ref_tname);
-				return -EINVAL;
-			}
 		} else if (ptr_to_mem_ok && processing_call) {
 			const struct btf_type *resolve_ret;
 			u32 type_size;
 
-			if (is_kfunc) {
-				bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
-				bool arg_dynptr = btf_type_is_struct(ref_t) &&
-						  !strcmp(ref_tname,
-							  stringify_struct(bpf_dynptr_kern));
-
-				/* Permit pointer to mem, but only when argument
-				 * type is pointer to scalar, or struct composed
-				 * (recursively) of scalars.
-				 * When arg_mem_size is true, the pointer can be
-				 * void *.
-				 * Also permit initialized local dynamic pointers.
-				 */
-				if (!btf_type_is_scalar(ref_t) &&
-				    !__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
-				    !arg_dynptr &&
-				    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
-					bpf_log(log,
-						"arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
-						i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
-					return -EINVAL;
-				}
-
-				if (arg_dynptr) {
-					if (reg->type != PTR_TO_STACK) {
-						bpf_log(log, "arg#%d pointer type %s %s not to stack\n",
-							i, btf_type_str(ref_t),
-							ref_tname);
-						return -EINVAL;
-					}
-
-					if (!is_dynptr_reg_valid_init(env, reg)) {
-						bpf_log(log,
-							"arg#%d pointer type %s %s must be valid and initialized\n",
-							i, btf_type_str(ref_t),
-							ref_tname);
-						return -EINVAL;
-					}
-
-					if (!is_dynptr_type_expected(env, reg,
-							ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
-						bpf_log(log,
-							"arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
-							i, btf_type_str(ref_t),
-							ref_tname);
-						return -EINVAL;
-					}
-
-					continue;
-				}
-
-				/* Check for mem, len pair */
-				if (arg_mem_size) {
-					if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) {
-						bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
-							i, i + 1);
-						return -EINVAL;
-					}
-					i++;
-					continue;
-				}
-			}
-
 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
 			if (IS_ERR(resolve_ret)) {
 				bpf_log(log,
@@ -7015,36 +6688,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 			if (check_mem_reg(env, reg, regno, type_size))
 				return -EINVAL;
 		} else {
-			bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i,
-				is_kfunc ? "kernel " : "", func_name, func_id);
+			bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i,
+				func_name, func_id);
 			return -EINVAL;
 		}
 	}
 
-	/* Either both are set, or neither */
-	WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
-	/* We already made sure ref_obj_id is set only for one argument. We do
-	 * allow (!rel && ref_obj_id), so that passing such referenced
-	 * PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when
-	 * is_kfunc is true.
-	 */
-	if (rel && !ref_obj_id) {
-		bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
-			func_name);
-		return -EINVAL;
-	}
-
-	if (sleepable && !env->prog->aux->sleepable) {
-		bpf_log(log, "kernel function %s is sleepable but the program is not\n",
-			func_name);
-		return -EINVAL;
-	}
-
-	if (kfunc_meta && ref_obj_id)
-		kfunc_meta->ref_obj_id = ref_obj_id;
-
-	/* returns argument register number > 0 in case of reference release kfunc */
-	return rel ? ref_regno : 0;
+	return 0;
 }
 
 /* Compare BTF of a function declaration with given bpf_reg_state.
@@ -7074,7 +6724,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 		return -EINVAL;
 
 	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, false);
+	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false);
 
 	/* Compiler optimizations can remove arguments from static functions
 	 * or mismatched type can be passed into a global function.
@@ -7117,7 +6767,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 		return -EINVAL;
 
 	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, true);
+	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true);
 
 	/* Compiler optimizations can remove arguments from static functions
 	 * or mismatched type can be passed into a global function.
@@ -7128,14 +6778,6 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 	return err;
 }
 
-int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
-			      const struct btf *btf, u32 func_id,
-			      struct bpf_reg_state *regs,
-			      struct bpf_kfunc_arg_meta *meta)
-{
-	return btf_check_func_arg_match(env, btf, func_id, regs, true, meta, true);
-}
-
 /* Convert BTF of a function into bpf_reg_state if possible
  * Returns:
  * EFAULT - there is a verifier bug. Abort verification.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c8f3abe9b08e..7d7a91c54709 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5550,8 +5550,8 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 	return err;
 }
 
-int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-			     u32 regno)
+static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+				    u32 regno)
 {
 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
 	bool may_be_null = type_may_be_null(mem_reg->type);
@@ -7863,19 +7863,517 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
 	}
 }
 
+struct bpf_kfunc_call_arg_meta {
+	/* In parameters */
+	struct btf *btf;
+	u32 func_id;
+	u32 kfunc_flags;
+	const struct btf_type *func_proto;
+	const char *func_name;
+	/* Out parameters */
+	u32 ref_obj_id;
+	u8 release_regno;
+	bool r0_rdonly;
+	u64 r0_size;
+};
+
+static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_ACQUIRE;
+}
+
+static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_RET_NULL;
+}
+
+static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_RELEASE;
+}
+
+static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_TRUSTED_ARGS;
+}
+
+static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_SLEEPABLE;
+}
+
+static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->kfunc_flags & KF_DESTRUCTIVE;
+}
+
+static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
+{
+	return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
+}
+
+static bool is_kfunc_arg_mem_size(const struct btf *btf,
+				  const struct btf_param *arg,
+				  const struct bpf_reg_state *reg)
+{
+	int len, sfx_len = sizeof("__sz") - 1;
+	const struct btf_type *t;
+	const char *param_name;
+
+	t = btf_type_skip_modifiers(btf, arg->type, NULL);
+	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+		return false;
+
+	/* In the future, this can be ported to use BTF tagging */
+	param_name = btf_name_by_offset(btf, arg->name_off);
+	if (str_is_empty(param_name))
+		return false;
+	len = strlen(param_name);
+	if (len < sfx_len)
+		return false;
+	param_name += len - sfx_len;
+	if (strncmp(param_name, "__sz", sfx_len))
+		return false;
+
+	return true;
+}
+
+static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
+					  const struct btf_param *arg,
+					  const char *name)
+{
+	int len, target_len = strlen(name);
+	const char *param_name;
+
+	param_name = btf_name_by_offset(btf, arg->name_off);
+	if (str_is_empty(param_name))
+		return false;
+	len = strlen(param_name);
+	if (len != target_len)
+		return false;
+	if (strcmp(param_name, name))
+		return false;
+
+	return true;
+}
+
+enum {
+	KF_ARG_DYNPTR_ID,
+};
+
+BTF_ID_LIST(kf_arg_btf_ids)
+BTF_ID(struct, bpf_dynptr_kern)
+
+static bool is_kfunc_arg_dynptr(const struct btf *btf,
+				const struct btf_param *arg)
+{
+	const struct btf_type *t;
+	u32 res_id;
+
+	t = btf_type_skip_modifiers(btf, arg->type, NULL);
+	if (!t)
+		return false;
+	if (!btf_type_is_ptr(t))
+		return false;
+	t = btf_type_skip_modifiers(btf, t->type, &res_id);
+	if (!t)
+		return false;
+	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[KF_ARG_DYNPTR_ID]);
+}
+
+/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
+static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
+					const struct btf *btf,
+					const struct btf_type *t, int rec)
+{
+	const struct btf_type *member_type;
+	const struct btf_member *member;
+	u32 i;
+
+	if (!btf_type_is_struct(t))
+		return false;
+
+	for_each_member(i, t, member) {
+		const struct btf_array *array;
+
+		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
+		if (btf_type_is_struct(member_type)) {
+			if (rec >= 3) {
+				verbose(env, "max struct nesting depth exceeded\n");
+				return false;
+			}
+			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
+				return false;
+			continue;
+		}
+		if (btf_type_is_array(member_type)) {
+			array = btf_array(member_type);
+			if (!array->nelems)
+				return false;
+			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
+			if (!btf_type_is_scalar(member_type))
+				return false;
+			continue;
+		}
+		if (!btf_type_is_scalar(member_type))
+			return false;
+	}
+	return true;
+}
+
+
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+};
+
+enum kfunc_ptr_arg_type {
+	KF_ARG_PTR_TO_CTX,
+	KF_ARG_PTR_TO_KPTR,	     /* PTR_TO_KPTR but type specific */
+	KF_ARG_PTR_TO_DYNPTR,
+	KF_ARG_PTR_TO_BTF_ID,	     /* Also covers reg2btf_ids conversions */
+	KF_ARG_PTR_TO_MEM,
+	KF_ARG_PTR_TO_MEM_SIZE,	     /* Size derived from next argument, skip it */
+};
+
+static enum kfunc_ptr_arg_type
+get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
+		       struct bpf_kfunc_call_arg_meta *meta,
+		       const struct btf_type *t, const struct btf_type *ref_t,
+		       const char *ref_tname, const struct btf_param *args,
+		       int argno, int nargs)
+{
+	u32 regno = argno + 1;
+	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_reg_state *reg = &regs[regno];
+	bool arg_mem_size = false;
+
+	/* In this function, we verify the kfunc's BTF as per the argument type,
+	 * leaving the rest of the verification with respect to the register
+	 * type to our caller. When a set of conditions hold in the BTF type of
+	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
+	 */
+	if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
+		return KF_ARG_PTR_TO_CTX;
+
+	if (is_kfunc_arg_kptr_get(meta, argno)) {
+		if (!btf_type_is_ptr(ref_t)) {
+			verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
+			return -EINVAL;
+		}
+		ref_t = btf_type_by_id(meta->btf, ref_t->type);
+		ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
+		if (!btf_type_is_struct(ref_t)) {
+			verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
+				meta->func_name, btf_type_str(ref_t), ref_tname);
+			return -EINVAL;
+		}
+		return KF_ARG_PTR_TO_KPTR;
+	}
+
+	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_DYNPTR;
+
+	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
+		if (!btf_type_is_struct(ref_t)) {
+			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
+				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
+			return -EINVAL;
+		}
+		return KF_ARG_PTR_TO_BTF_ID;
+	}
+
+	if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
+		arg_mem_size = true;
+
+	/* This is the catch all argument type of register types supported by
+	 * check_helper_mem_access. However, we only allow when argument type is
+	 * pointer to scalar, or struct composed (recursively) of scalars. When
+	 * arg_mem_size is true, the pointer can be void *.
+	 */
+	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
+	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
+		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
+			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
+		return -EINVAL;
+	}
+	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
+}
+
+static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
+					struct bpf_reg_state *reg,
+					const struct btf_type *ref_t,
+					const char *ref_tname, u32 ref_id,
+					struct bpf_kfunc_call_arg_meta *meta,
+					int argno)
+{
+	const struct btf_type *reg_ref_t;
+	bool strict_type_match = false;
+	const struct btf *reg_btf;
+	const char *reg_ref_tname;
+	u32 reg_ref_id;
+
+	if (reg->type == PTR_TO_BTF_ID) {
+		reg_btf = reg->btf;
+		reg_ref_id = reg->btf_id;
+	} else {
+		reg_btf = btf_vmlinux;
+		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
+	}
+
+	if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
+		strict_type_match = true;
+
+	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
+	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
+	if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
+		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
+			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
+			btf_type_str(reg_ref_t), reg_ref_tname);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
+				      struct bpf_reg_state *reg,
+				      const struct btf_type *ref_t,
+				      const char *ref_tname,
+				      struct bpf_kfunc_call_arg_meta *meta,
+				      int argno)
+{
+	struct btf_field *kptr_field;
+
+	/* check_func_arg_reg_off allows var_off for
+	 * PTR_TO_MAP_VALUE, but we need fixed offset to find
+	 * off_desc.
+	 */
+	if (!tnum_is_const(reg->var_off)) {
+		verbose(env, "arg#0 must have constant offset\n");
+		return -EINVAL;
+	}
+
+	kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
+	if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
+		verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
+			reg->off + reg->var_off.value);
+		return -EINVAL;
+	}
+
+	if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
+				  kptr_field->kptr.btf_id, true)) {
+		verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
+			meta->func_name, argno, btf_type_str(ref_t), ref_tname);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
+{
+	const char *func_name = meta->func_name, *ref_tname;
+	const struct btf *btf = meta->btf;
+	const struct btf_param *args;
+	u32 i, nargs;
+	int ret;
+
+	args = (const struct btf_param *)(meta->func_proto + 1);
+	nargs = btf_type_vlen(meta->func_proto);
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
+			MAX_BPF_FUNC_REG_ARGS);
+		return -EINVAL;
+	}
+
+	/* Check that BTF function arguments match actual types that the
+	 * verifier sees.
+	 */
+	for (i = 0; i < nargs; i++) {
+		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
+		const struct btf_type *t, *ref_t, *resolve_ret;
+		enum bpf_arg_type arg_type = ARG_DONTCARE;
+		u32 regno = i + 1, ref_id, type_size;
+		bool is_ret_buf_sz = false;
+		int kf_arg_type;
+
+		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+		if (btf_type_is_scalar(t)) {
+			if (reg->type != SCALAR_VALUE) {
+				verbose(env, "R%d is not a scalar\n", regno);
+				return -EINVAL;
+			}
+			if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
+				meta->r0_rdonly = true;
+				is_ret_buf_sz = true;
+			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
+				is_ret_buf_sz = true;
+			}
+
+			if (is_ret_buf_sz) {
+				if (meta->r0_size) {
+					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
+					return -EINVAL;
+				}
+
+				if (!tnum_is_const(reg->var_off)) {
+					verbose(env, "R%d is not a const\n", regno);
+					return -EINVAL;
+				}
+
+				meta->r0_size = reg->var_off.value;
+				ret = mark_chain_precision(env, regno);
+				if (ret)
+					return ret;
+			}
+			continue;
+		}
+
+		if (!btf_type_is_ptr(t)) {
+			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
+			return -EINVAL;
+		}
+
+		if (reg->ref_obj_id) {
+			if (is_kfunc_release(meta) && meta->ref_obj_id) {
+				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+					regno, reg->ref_obj_id,
+					meta->ref_obj_id);
+				return -EFAULT;
+			}
+			meta->ref_obj_id = reg->ref_obj_id;
+			if (is_kfunc_release(meta))
+				meta->release_regno = regno;
+		}
+
+		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
+		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+
+		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
+		if (kf_arg_type < 0)
+			return kf_arg_type;
+
+		switch (kf_arg_type) {
+		case KF_ARG_PTR_TO_BTF_ID:
+			if (!is_kfunc_trusted_args(meta))
+				break;
+			if (!reg->ref_obj_id) {
+				verbose(env, "R%d must be referenced\n", regno);
+				return -EINVAL;
+			}
+			fallthrough;
+		case KF_ARG_PTR_TO_CTX:
+			/* Trusted arguments have the same offset checks as release arguments */
+			arg_type |= OBJ_RELEASE;
+			break;
+		case KF_ARG_PTR_TO_KPTR:
+		case KF_ARG_PTR_TO_DYNPTR:
+		case KF_ARG_PTR_TO_MEM:
+		case KF_ARG_PTR_TO_MEM_SIZE:
+			/* Trusted by default */
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			return -EFAULT;
+		}
+
+		if (is_kfunc_release(meta) && reg->ref_obj_id)
+			arg_type |= OBJ_RELEASE;
+		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
+		if (ret < 0)
+			return ret;
+
+		switch (kf_arg_type) {
+		case KF_ARG_PTR_TO_CTX:
+			if (reg->type != PTR_TO_CTX) {
+				verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
+				return -EINVAL;
+			}
+			break;
+		case KF_ARG_PTR_TO_KPTR:
+			if (reg->type != PTR_TO_MAP_VALUE) {
+				verbose(env, "arg#0 expected pointer to map value\n");
+				return -EINVAL;
+			}
+			ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
+			if (ret < 0)
+				return ret;
+			break;
+		case KF_ARG_PTR_TO_DYNPTR:
+			if (reg->type != PTR_TO_STACK) {
+				verbose(env, "arg#%d expected pointer to stack\n", i);
+				return -EINVAL;
+			}
+
+			if (!is_dynptr_reg_valid_init(env, reg)) {
+				verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n",
+					i, btf_type_str(ref_t), ref_tname);
+				return -EINVAL;
+			}
+
+			if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
+				verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
+					i, btf_type_str(ref_t), ref_tname);
+				return -EINVAL;
+			}
+			break;
+		case KF_ARG_PTR_TO_BTF_ID:
+			/* Only base_type is checked, further checks are done here */
+			if (reg->type != PTR_TO_BTF_ID &&
+			    (!reg2btf_ids[base_type(reg->type)] || type_flag(reg->type))) {
+				verbose(env, "arg#%d expected pointer to btf or socket\n", i);
+				return -EINVAL;
+			}
+			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
+			if (ret < 0)
+				return ret;
+			break;
+		case KF_ARG_PTR_TO_MEM:
+			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
+			if (IS_ERR(resolve_ret)) {
+				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
+					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
+				return -EINVAL;
+			}
+			ret = check_mem_reg(env, reg, regno, type_size);
+			if (ret < 0)
+				return ret;
+			break;
+		case KF_ARG_PTR_TO_MEM_SIZE:
+			ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
+			if (ret < 0) {
+				verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
+				return ret;
+			}
+			/* Skip next '__sz' argument */
+			i++;
+			break;
+		}
+	}
+
+	if (is_kfunc_release(meta) && !meta->release_regno) {
+		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
+			func_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
 	const struct btf_type *t, *func, *func_proto, *ptr_type;
 	struct bpf_reg_state *regs = cur_regs(env);
-	struct bpf_kfunc_arg_meta meta = { 0 };
 	const char *func_name, *ptr_type_name;
+	struct bpf_kfunc_call_arg_meta meta;
 	u32 i, nargs, func_id, ptr_type_id;
 	int err, insn_idx = *insn_idx_p;
 	const struct btf_param *args;
 	struct btf *desc_btf;
 	u32 *kfunc_flags;
-	bool acq;
 
 	/* skip for now, but return error when we find this in fixup_kfunc_call */
 	if (!insn->imm)
@@ -7896,24 +8394,34 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			func_name);
 		return -EACCES;
 	}
-	if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) {
-		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n");
+
+	/* Prepare kfunc call metadata */
+	memset(&meta, 0, sizeof(meta));
+	meta.btf = desc_btf;
+	meta.func_id = func_id;
+	meta.kfunc_flags = *kfunc_flags;
+	meta.func_proto = func_proto;
+	meta.func_name = func_name;
+
+	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
+		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
 		return -EACCES;
 	}
 
-	acq = *kfunc_flags & KF_ACQUIRE;
-
-	meta.flags = *kfunc_flags;
+	if (is_kfunc_sleepable(&meta) && !env->prog->aux->sleepable) {
+		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
+		return -EACCES;
+	}
 
 	/* Check the arguments */
-	err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta);
+	err = check_kfunc_args(env, &meta);
 	if (err < 0)
 		return err;
 	/* In case of release function, we get register number of refcounted
-	 * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
 	 */
-	if (err) {
-		err = release_reference(env, regs[err].ref_obj_id);
+	if (meta.release_regno) {
+		err = release_reference(env, regs[meta.release_regno].ref_obj_id);
 		if (err) {
 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 				func_name, func_id);
@@ -7927,7 +8435,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	/* Check return type */
 	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
 
-	if (acq && !btf_type_is_struct_ptr(desc_btf, t)) {
+	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
 		verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 		return -EINVAL;
 	}
@@ -7966,20 +8474,23 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 			regs[BPF_REG_0].btf_id = ptr_type_id;
 		}
-		if (*kfunc_flags & KF_RET_NULL) {
+		if (is_kfunc_ret_null(&meta)) {
 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
-		if (acq) {
+		if (is_kfunc_acquire(&meta)) {
 			int id = acquire_reference_state(env, insn_idx);
 
 			if (id < 0)
 				return id;
-			regs[BPF_REG_0].id = id;
+			if (is_kfunc_ret_null(&meta))
+				regs[BPF_REG_0].id = id;
 			regs[BPF_REG_0].ref_obj_id = id;
 		}
+		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
+			regs[BPF_REG_0].id = ++env->id_gen;
 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 
 	nargs = btf_type_vlen(func_proto);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index c210657d4d0a..55d641c1f126 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -22,7 +22,7 @@ static struct {
 	 "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
 	{"not_valid_dynptr",
 	 "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
-	{"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0},
+	{"not_ptr_to_stack", "arg#0 expected pointer to stack", 0},
 	{"dynptr_data_null", NULL, -EBADMSG},
 };
 
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index e1a937277b54..86d6fef2e3b4 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -109,7 +109,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
+	.errstr = "arg#0 expected pointer to btf or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 3 },
 		{ "bpf_kfunc_call_test_release", 5 },
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index fd683a32a276..55cba01c99d5 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -142,7 +142,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.errstr = "arg#0 expected pointer to btf or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_user_key", 2 },
 		{ "bpf_key_put", 4 },
@@ -163,7 +163,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.errstr = "arg#0 expected pointer to btf or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_system_key", 1 },
 		{ "bpf_key_put", 3 },
-- 
cgit v1.2.3-58-ga151


From 958cf2e273f0929c66169e0788031310e8118722 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:03 +0530
Subject: bpf: Introduce bpf_obj_new

Introduce type safe memory allocator bpf_obj_new for BPF programs. The
kernel side kfunc is named bpf_obj_new_impl, as passing hidden arguments
to kfuncs still requires having them in prototype, unlike BPF helpers
which always take 5 arguments and have them checked using bpf_func_proto
in verifier, ignoring unset argument types.

Introduce __ign suffix to ignore a specific kfunc argument during type
checks, then use this to introduce support for passing type metadata to
the bpf_obj_new_impl kfunc.

The user passes BTF ID of the type it wants to allocates in program BTF,
the verifier then rewrites the first argument as the size of this type,
after performing some sanity checks (to ensure it exists and it is a
struct type).

The second argument is also fixed up and passed by the verifier. This is
the btf_struct_meta for the type being allocated. It would be needed
mostly for the offset array which is required for zero initializing
special fields while leaving the rest of storage in unitialized state.

It would also be needed in the next patch to perform proper destruction
of the object's special fields.

Under the hood, bpf_obj_new will call bpf_mem_alloc and bpf_mem_free,
using the any context BPF memory allocator introduced recently. To this
end, a global instance of the BPF memory allocator is initialized on
boot to be used for this purpose. This 'bpf_global_ma' serves all
allocations for bpf_obj_new. In the future, bpf_obj_new variants will
allow specifying a custom allocator.

Note that now that bpf_obj_new can be used to allocate objects that can
be linked to BPF linked list (when future linked list helpers are
available), we need to also free the elements using bpf_mem_free.
However, since the draining of elements is done outside the
bpf_spin_lock, we need to do migrate_disable around the call since
bpf_list_head_free can be called from map free path where migration is
enabled. Otherwise, when called from BPF programs migration is already
disabled.

A convenience macro is included in the bpf_experimental.h header to hide
over the ugly details of the implementation, leading to user code
looking similar to a language level extension which allocates and
constructs fields of a user type.

struct bar {
	struct bpf_list_node node;
};

struct foo {
	struct bpf_spin_lock lock;
	struct bpf_list_head head __contains(bar, node);
};

void prog(void) {
	struct foo *f;

	f = bpf_obj_new(typeof(*f));
	if (!f)
		return;
	...
}

A key piece of this story is still missing, i.e. the free function,
which will come in the next patch.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-14-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                            |  21 +++--
 include/linux/bpf_verifier.h                   |   2 +
 kernel/bpf/core.c                              |  16 ++++
 kernel/bpf/helpers.c                           |  47 ++++++++++--
 kernel/bpf/verifier.c                          | 102 +++++++++++++++++++++++--
 tools/testing/selftests/bpf/bpf_experimental.h |  25 ++++++
 6 files changed, 190 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_experimental.h

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0a74df731eb8..8b32376ce746 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -54,6 +54,8 @@ struct cgroup;
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
+extern struct bpf_mem_alloc bpf_global_ma;
+extern bool bpf_global_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -334,16 +336,19 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
 	return rec->field_mask & type;
 }
 
-static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
 {
-	if (!IS_ERR_OR_NULL(map->record)) {
-		struct btf_field *fields = map->record->fields;
-		u32 cnt = map->record->cnt;
-		int i;
+	int i;
 
-		for (i = 0; i < cnt; i++)
-			memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type));
-	}
+	if (!foffs)
+		return;
+	for (i = 0; i < foffs->cnt; i++)
+		memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
+}
+
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+	bpf_obj_init(map->field_offs, dst);
 }
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index fb146b0ce006..3dc72d396dfc 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -433,6 +433,8 @@ struct bpf_insn_aux_data {
 		 */
 		struct bpf_loop_inline_state loop_inline_state;
 	};
+	u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */
+	struct btf_struct_meta *kptr_struct_meta;
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9c16338bcbe8..2e57fc839a5c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -34,6 +34,7 @@
 #include <linux/log2.h>
 #include <linux/bpf_verifier.h>
 #include <linux/nodemask.h>
+#include <linux/bpf_mem_alloc.h>
 
 #include <asm/barrier.h>
 #include <asm/unaligned.h>
@@ -60,6 +61,9 @@
 #define CTX	regs[BPF_REG_CTX]
 #define IMM	insn->imm
 
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
+
 /* No hurry in this branch
  *
  * Exported for the bpf jit load helper.
@@ -2746,6 +2750,18 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len)
 	return -ENOTSUPP;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+static int __init bpf_global_ma_init(void)
+{
+	int ret;
+
+	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
+	bpf_global_ma_set = !ret;
+	return ret;
+}
+late_initcall(bpf_global_ma_init);
+#endif
+
 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 EXPORT_SYMBOL(bpf_stats_enabled_key);
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5bc0b9f0f306..c4f1c22cc44c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -19,6 +19,7 @@
 #include <linux/proc_ns.h>
 #include <linux/security.h>
 #include <linux/btf_ids.h>
+#include <linux/bpf_mem_alloc.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -1735,25 +1736,57 @@ unlock:
 
 		obj -= field->list_head.node_offset;
 		head = head->next;
-		/* TODO: Rework later */
-		kfree(obj);
+		/* The contained type can also have resources, including a
+		 * bpf_list_head which needs to be freed.
+		 */
+		bpf_obj_free_fields(field->list_head.value_rec, obj);
+		/* bpf_mem_free requires migrate_disable(), since we can be
+		 * called from map free path as well apart from BPF program (as
+		 * part of map ops doing bpf_obj_free_fields).
+		 */
+		migrate_disable();
+		bpf_mem_free(&bpf_global_ma, obj);
+		migrate_enable();
 	}
 }
 
-BTF_SET8_START(tracing_btf_ids)
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+	struct btf_struct_meta *meta = meta__ign;
+	u64 size = local_type_id__k;
+	void *p;
+
+	if (unlikely(!bpf_global_ma_set))
+		return NULL;
+	p = bpf_mem_alloc(&bpf_global_ma, size);
+	if (!p)
+		return NULL;
+	if (meta)
+		bpf_obj_init(meta->field_offs, p);
+	return p;
+}
+
+__diag_pop();
+
+BTF_SET8_START(generic_btf_ids)
 #ifdef CONFIG_KEXEC_CORE
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
-BTF_SET8_END(tracing_btf_ids)
+BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_SET8_END(generic_btf_ids)
 
-static const struct btf_kfunc_id_set tracing_kfunc_set = {
+static const struct btf_kfunc_id_set generic_kfunc_set = {
 	.owner = THIS_MODULE,
-	.set   = &tracing_btf_ids,
+	.set   = &generic_btf_ids,
 };
 
 static int __init kfunc_init(void)
 {
-	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set);
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
 }
 
 late_initcall(kfunc_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c8fcc2808d99..804f3bca6c08 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7952,6 +7952,11 @@ static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param
 	return __kfunc_param_match_suffix(btf, arg, "__k");
 }
 
+static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
+{
+	return __kfunc_param_match_suffix(btf, arg, "__ign");
+}
+
 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 					  const struct btf_param *arg,
 					  const char *name)
@@ -8214,6 +8219,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		int kf_arg_type;
 
 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+
+		if (is_kfunc_arg_ignore(btf, &args[i]))
+			continue;
+
 		if (btf_type_is_scalar(t)) {
 			if (reg->type != SCALAR_VALUE) {
 				verbose(env, "R%d is not a scalar\n", regno);
@@ -8391,6 +8400,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 	return 0;
 }
 
+enum special_kfunc_type {
+	KF_bpf_obj_new_impl,
+};
+
+BTF_SET_START(special_kfunc_set)
+BTF_ID(func, bpf_obj_new_impl)
+BTF_SET_END(special_kfunc_set)
+
+BTF_ID_LIST(special_kfunc_list)
+BTF_ID(func, bpf_obj_new_impl)
+
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
@@ -8465,17 +8485,59 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
 
 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
-		verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
-		return -EINVAL;
+		/* Only exception is bpf_obj_new_impl */
+		if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
+			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+			return -EINVAL;
+		}
 	}
 
 	if (btf_type_is_scalar(t)) {
 		mark_reg_unknown(env, regs, BPF_REG_0);
 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 	} else if (btf_type_is_ptr(t)) {
-		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
-						   &ptr_type_id);
-		if (!btf_type_is_struct(ptr_type)) {
+		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
+
+		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
+			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+				const struct btf_type *ret_t;
+				struct btf *ret_btf;
+				u32 ret_btf_id;
+
+				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
+					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
+					return -EINVAL;
+				}
+
+				ret_btf = env->prog->aux->btf;
+				ret_btf_id = meta.arg_constant.value;
+
+				/* This may be NULL due to user not supplying a BTF */
+				if (!ret_btf) {
+					verbose(env, "bpf_obj_new requires prog BTF\n");
+					return -EINVAL;
+				}
+
+				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
+				if (!ret_t || !__btf_type_is_struct(ret_t)) {
+					verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+					return -EINVAL;
+				}
+
+				mark_reg_known_zero(env, regs, BPF_REG_0);
+				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+				regs[BPF_REG_0].btf = ret_btf;
+				regs[BPF_REG_0].btf_id = ret_btf_id;
+
+				env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
+				env->insn_aux_data[insn_idx].kptr_struct_meta =
+					btf_find_struct_meta(ret_btf, ret_btf_id);
+			} else {
+				verbose(env, "kernel function %s unhandled dynamic return type\n",
+					meta.func_name);
+				return -EFAULT;
+			}
+		} else if (!__btf_type_is_struct(ptr_type)) {
 			if (!meta.r0_size) {
 				ptr_type_name = btf_name_by_offset(desc_btf,
 								   ptr_type->name_off);
@@ -8503,6 +8565,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 			regs[BPF_REG_0].btf_id = ptr_type_id;
 		}
+
 		if (is_kfunc_ret_null(&meta)) {
 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
@@ -14671,8 +14734,8 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 	return err;
 }
 
-static int fixup_kfunc_call(struct bpf_verifier_env *env,
-			    struct bpf_insn *insn)
+static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
 {
 	const struct bpf_kfunc_desc *desc;
 
@@ -14691,8 +14754,21 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
 		return -EFAULT;
 	}
 
+	*cnt = 0;
 	insn->imm = desc->imm;
+	if (insn->off)
+		return 0;
+	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
 
+		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
+		insn_buf[1] = addr[0];
+		insn_buf[2] = addr[1];
+		insn_buf[3] = *insn;
+		*cnt = 4;
+	}
 	return 0;
 }
 
@@ -14834,9 +14910,19 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 		if (insn->src_reg == BPF_PSEUDO_CALL)
 			continue;
 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
-			ret = fixup_kfunc_call(env, insn);
+			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
 			if (ret)
 				return ret;
+			if (cnt == 0)
+				continue;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta	 += cnt - 1;
+			env->prog = prog = new_prog;
+			insn	  = new_prog->insnsi + i + delta;
 			continue;
 		}
 
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
new file mode 100644
index 000000000000..aeb6a7fcb7c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -0,0 +1,25 @@
+#ifndef __BPF_EXPERIMENTAL__
+#define __BPF_EXPERIMENTAL__
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+/* Description
+ *	Allocates an object of the type represented by 'local_type_id' in
+ *	program BTF. User may use the bpf_core_type_id_local macro to pass the
+ *	type ID of a struct in program BTF.
+ *
+ *	The 'local_type_id' parameter must be a known constant.
+ *	The 'meta' parameter is a hidden argument that is ignored.
+ * Returns
+ *	A pointer to an object of the type corresponding to the passed in
+ *	'local_type_id', or NULL on failure.
+ */
+extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_new_impl */
+#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+#endif
-- 
cgit v1.2.3-58-ga151


From ac9f06050a3580cf4076a57a470cd71f12a81171 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:04 +0530
Subject: bpf: Introduce bpf_obj_drop

Introduce bpf_obj_drop, which is the kfunc used to free allocated
objects (allocated using bpf_obj_new). Pairing with bpf_obj_new, it
implicitly destructs the fields part of object automatically without
user intervention.

Just like the previous patch, btf_struct_meta that is needed to free up
the special fields is passed as a hidden argument to the kfunc.

For the user, a convenience macro hides over the kernel side kfunc which
is named bpf_obj_drop_impl.

Continuing the previous example:

void prog(void) {
	struct foo *f;

	f = bpf_obj_new(typeof(*f));
	if (!f)
		return;
	bpf_obj_drop(f);
}

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-15-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/helpers.c                           | 11 +++++
 kernel/bpf/verifier.c                          | 66 +++++++++++++++++++++-----
 tools/testing/selftests/bpf/bpf_experimental.h | 13 +++++
 3 files changed, 79 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index c4f1c22cc44c..71d803ca0c1d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1770,6 +1770,16 @@ void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 	return p;
 }
 
+void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+	struct btf_struct_meta *meta = meta__ign;
+	void *p = p__alloc;
+
+	if (meta)
+		bpf_obj_free_fields(meta->record, p);
+	bpf_mem_free(&bpf_global_ma, p);
+}
+
 __diag_pop();
 
 BTF_SET8_START(generic_btf_ids)
@@ -1777,6 +1787,7 @@ BTF_SET8_START(generic_btf_ids)
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
 BTF_SET8_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 804f3bca6c08..1fbb0b51c429 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7879,6 +7879,10 @@ struct bpf_kfunc_call_arg_meta {
 		u64 value;
 		bool found;
 	} arg_constant;
+	struct {
+		struct btf *btf;
+		u32 btf_id;
+	} arg_obj_drop;
 };
 
 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
@@ -7957,6 +7961,11 @@ static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *a
 	return __kfunc_param_match_suffix(btf, arg, "__ign");
 }
 
+static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
+{
+	return __kfunc_param_match_suffix(btf, arg, "__alloc");
+}
+
 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 					  const struct btf_param *arg,
 					  const char *name)
@@ -8051,6 +8060,7 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
 
 enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_CTX,
+	KF_ARG_PTR_TO_ALLOC_BTF_ID,  /* Allocated object */
 	KF_ARG_PTR_TO_KPTR,	     /* PTR_TO_KPTR but type specific */
 	KF_ARG_PTR_TO_DYNPTR,
 	KF_ARG_PTR_TO_BTF_ID,	     /* Also covers reg2btf_ids conversions */
@@ -8058,6 +8068,20 @@ enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_MEM_SIZE,	     /* Size derived from next argument, skip it */
 };
 
+enum special_kfunc_type {
+	KF_bpf_obj_new_impl,
+	KF_bpf_obj_drop_impl,
+};
+
+BTF_SET_START(special_kfunc_set)
+BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_drop_impl)
+BTF_SET_END(special_kfunc_set)
+
+BTF_ID_LIST(special_kfunc_list)
+BTF_ID(func, bpf_obj_new_impl)
+BTF_ID(func, bpf_obj_drop_impl)
+
 static enum kfunc_ptr_arg_type
 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 		       struct bpf_kfunc_call_arg_meta *meta,
@@ -8078,6 +8102,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
 		return KF_ARG_PTR_TO_CTX;
 
+	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
+
 	if (is_kfunc_arg_kptr_get(meta, argno)) {
 		if (!btf_type_is_ptr(ref_t)) {
 			verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
@@ -8294,6 +8321,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			return kf_arg_type;
 
 		switch (kf_arg_type) {
+		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
 		case KF_ARG_PTR_TO_BTF_ID:
 			if (!is_kfunc_trusted_args(meta))
 				break;
@@ -8330,6 +8358,21 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return -EINVAL;
 			}
 			break;
+		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+				verbose(env, "arg#%d expected pointer to allocated object\n", i);
+				return -EINVAL;
+			}
+			if (!reg->ref_obj_id) {
+				verbose(env, "allocated object must be referenced\n");
+				return -EINVAL;
+			}
+			if (meta->btf == btf_vmlinux &&
+			    meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+				meta->arg_obj_drop.btf = reg->btf;
+				meta->arg_obj_drop.btf_id = reg->btf_id;
+			}
+			break;
 		case KF_ARG_PTR_TO_KPTR:
 			if (reg->type != PTR_TO_MAP_VALUE) {
 				verbose(env, "arg#0 expected pointer to map value\n");
@@ -8400,17 +8443,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 	return 0;
 }
 
-enum special_kfunc_type {
-	KF_bpf_obj_new_impl,
-};
-
-BTF_SET_START(special_kfunc_set)
-BTF_ID(func, bpf_obj_new_impl)
-BTF_SET_END(special_kfunc_set)
-
-BTF_ID_LIST(special_kfunc_list)
-BTF_ID(func, bpf_obj_new_impl)
-
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
@@ -8532,6 +8564,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 				env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
 				env->insn_aux_data[insn_idx].kptr_struct_meta =
 					btf_find_struct_meta(ret_btf, ret_btf_id);
+			} else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+				env->insn_aux_data[insn_idx].kptr_struct_meta =
+					btf_find_struct_meta(meta.arg_obj_drop.btf,
+							     meta.arg_obj_drop.btf_id);
 			} else {
 				verbose(env, "kernel function %s unhandled dynamic return type\n",
 					meta.func_name);
@@ -14768,6 +14804,14 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		insn_buf[2] = addr[1];
 		insn_buf[3] = *insn;
 		*cnt = 4;
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
+		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+
+		insn_buf[0] = addr[0];
+		insn_buf[1] = addr[1];
+		insn_buf[2] = *insn;
+		*cnt = 3;
 	}
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index aeb6a7fcb7c4..8473395a11af 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -22,4 +22,17 @@ extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
 /* Convenience macro to wrap over bpf_obj_new_impl */
 #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
 
+/* Description
+ *	Free an allocated object. All fields of the object that require
+ *	destruction will be destructed before the storage is freed.
+ *
+ *	The 'meta' parameter is a hidden argument that is ignored.
+ * Returns
+ *	Void.
+ */
+extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
+
 #endif
-- 
cgit v1.2.3-58-ga151


From 8cab76ec634995e59a8b6346bf8b835ab7fad3a3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:06 +0530
Subject: bpf: Introduce single ownership BPF linked list API

Add a linked list API for use in BPF programs, where it expects
protection from the bpf_spin_lock in the same allocation as the
bpf_list_head. For now, only one bpf_spin_lock can be present hence that
is assumed to be the one protecting the bpf_list_head.

The following functions are added to kick things off:

// Add node to beginning of list
void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node);

// Add node to end of list
void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node);

// Remove node at beginning of list and return it
struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head);

// Remove node at end of list and return it
struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head);

The lock protecting the bpf_list_head needs to be taken for all
operations. The verifier ensures that the lock that needs to be taken is
always held, and only the correct lock is taken for these operations.
These checks are made statically by relying on the reg->id preserved for
registers pointing into regions having both bpf_spin_lock and the
objects protected by it. The comment over check_reg_allocation_locked in
this change describes the logic in detail.

Note that bpf_list_push_front and bpf_list_push_back are meant to
consume the object containing the node in the 1st argument, however that
specific mechanism is intended to not release the ref_obj_id directly
until the bpf_spin_unlock is called. In this commit, nothing is done,
but the next commit will be introducing logic to handle this case, so it
has been left as is for now.

bpf_list_pop_front and bpf_list_pop_back delete the first or last item
of the list respectively, and return pointer to the element at the
list_node offset. The user can then use container_of style macro to get
the actual entry type. The verifier however statically knows the actual
type, so the safety properties are still preserved.

With these additions, programs can now manage their own linked lists and
store their objects in them.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-17-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/helpers.c                           |  55 ++++-
 kernel/bpf/verifier.c                          | 275 ++++++++++++++++++++++++-
 tools/testing/selftests/bpf/bpf_experimental.h |  28 +++
 3 files changed, 349 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 71d803ca0c1d..212e791d7452 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1780,6 +1780,50 @@ void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
 	bpf_mem_free(&bpf_global_ma, p);
 }
 
+static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
+{
+	struct list_head *n = (void *)node, *h = (void *)head;
+
+	if (unlikely(!h->next))
+		INIT_LIST_HEAD(h);
+	if (unlikely(!n->next))
+		INIT_LIST_HEAD(n);
+	tail ? list_add_tail(n, h) : list_add(n, h);
+}
+
+void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
+{
+	return __bpf_list_add(node, head, false);
+}
+
+void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
+{
+	return __bpf_list_add(node, head, true);
+}
+
+static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
+{
+	struct list_head *n, *h = (void *)head;
+
+	if (unlikely(!h->next))
+		INIT_LIST_HEAD(h);
+	if (list_empty(h))
+		return NULL;
+	n = tail ? h->prev : h->next;
+	list_del_init(n);
+	return (struct bpf_list_node *)n;
+}
+
+struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
+{
+	return __bpf_list_del(head, false);
+}
+
+struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
+{
+	return __bpf_list_del(head, true);
+}
+
 __diag_pop();
 
 BTF_SET8_START(generic_btf_ids)
@@ -1788,6 +1832,10 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_list_push_front)
+BTF_ID_FLAGS(func, bpf_list_push_back)
+BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
 BTF_SET8_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
@@ -1797,7 +1845,12 @@ static const struct btf_kfunc_id_set generic_kfunc_set = {
 
 static int __init kfunc_init(void)
 {
-	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
+	int ret;
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
+	if (ret)
+		return ret;
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
 }
 
 late_initcall(kfunc_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a339a39d895c..1364df74129e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7883,6 +7883,9 @@ struct bpf_kfunc_call_arg_meta {
 		struct btf *btf;
 		u32 btf_id;
 	} arg_obj_drop;
+	struct {
+		struct btf_field *field;
+	} arg_list_head;
 };
 
 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
@@ -7987,13 +7990,17 @@ static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 
 enum {
 	KF_ARG_DYNPTR_ID,
+	KF_ARG_LIST_HEAD_ID,
+	KF_ARG_LIST_NODE_ID,
 };
 
 BTF_ID_LIST(kf_arg_btf_ids)
 BTF_ID(struct, bpf_dynptr_kern)
+BTF_ID(struct, bpf_list_head)
+BTF_ID(struct, bpf_list_node)
 
-static bool is_kfunc_arg_dynptr(const struct btf *btf,
-				const struct btf_param *arg)
+static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
+				    const struct btf_param *arg, int type)
 {
 	const struct btf_type *t;
 	u32 res_id;
@@ -8006,7 +8013,22 @@ static bool is_kfunc_arg_dynptr(const struct btf *btf,
 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
 	if (!t)
 		return false;
-	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[KF_ARG_DYNPTR_ID]);
+	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
+}
+
+static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
+{
+	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
+}
+
+static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
+{
+	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
+}
+
+static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
+{
+	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
 }
 
 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
@@ -8063,6 +8085,8 @@ enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_ALLOC_BTF_ID,  /* Allocated object */
 	KF_ARG_PTR_TO_KPTR,	     /* PTR_TO_KPTR but type specific */
 	KF_ARG_PTR_TO_DYNPTR,
+	KF_ARG_PTR_TO_LIST_HEAD,
+	KF_ARG_PTR_TO_LIST_NODE,
 	KF_ARG_PTR_TO_BTF_ID,	     /* Also covers reg2btf_ids conversions */
 	KF_ARG_PTR_TO_MEM,
 	KF_ARG_PTR_TO_MEM_SIZE,	     /* Size derived from next argument, skip it */
@@ -8071,16 +8095,28 @@ enum kfunc_ptr_arg_type {
 enum special_kfunc_type {
 	KF_bpf_obj_new_impl,
 	KF_bpf_obj_drop_impl,
+	KF_bpf_list_push_front,
+	KF_bpf_list_push_back,
+	KF_bpf_list_pop_front,
+	KF_bpf_list_pop_back,
 };
 
 BTF_SET_START(special_kfunc_set)
 BTF_ID(func, bpf_obj_new_impl)
 BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_list_push_front)
+BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_list_pop_front)
+BTF_ID(func, bpf_list_pop_back)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
 BTF_ID(func, bpf_obj_new_impl)
 BTF_ID(func, bpf_obj_drop_impl)
+BTF_ID(func, bpf_list_push_front)
+BTF_ID(func, bpf_list_push_back)
+BTF_ID(func, bpf_list_pop_front)
+BTF_ID(func, bpf_list_pop_back)
 
 static enum kfunc_ptr_arg_type
 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
@@ -8123,6 +8159,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
 		return KF_ARG_PTR_TO_DYNPTR;
 
+	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_LIST_HEAD;
+
+	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_LIST_NODE;
+
 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
 		if (!btf_type_is_struct(ref_t)) {
 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -8218,6 +8260,182 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
 	return 0;
 }
 
+/* Implementation details:
+ *
+ * Each register points to some region of memory, which we define as an
+ * allocation. Each allocation may embed a bpf_spin_lock which protects any
+ * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
+ * allocation. The lock and the data it protects are colocated in the same
+ * memory region.
+ *
+ * Hence, everytime a register holds a pointer value pointing to such
+ * allocation, the verifier preserves a unique reg->id for it.
+ *
+ * The verifier remembers the lock 'ptr' and the lock 'id' whenever
+ * bpf_spin_lock is called.
+ *
+ * To enable this, lock state in the verifier captures two values:
+ *	active_lock.ptr = Register's type specific pointer
+ *	active_lock.id  = A unique ID for each register pointer value
+ *
+ * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
+ * supported register types.
+ *
+ * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
+ * allocated objects is the reg->btf pointer.
+ *
+ * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
+ * can establish the provenance of the map value statically for each distinct
+ * lookup into such maps. They always contain a single map value hence unique
+ * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
+ *
+ * So, in case of global variables, they use array maps with max_entries = 1,
+ * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
+ * into the same map value as max_entries is 1, as described above).
+ *
+ * In case of inner map lookups, the inner map pointer has same map_ptr as the
+ * outer map pointer (in verifier context), but each lookup into an inner map
+ * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
+ * maps from the same outer map share the same map_ptr as active_lock.ptr, they
+ * will get different reg->id assigned to each lookup, hence different
+ * active_lock.id.
+ *
+ * In case of allocated objects, active_lock.ptr is the reg->btf, and the
+ * reg->id is a unique ID preserved after the NULL pointer check on the pointer
+ * returned from bpf_obj_new. Each allocation receives a new reg->id.
+ */
+static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+	void *ptr;
+	u32 id;
+
+	switch ((int)reg->type) {
+	case PTR_TO_MAP_VALUE:
+		ptr = reg->map_ptr;
+		break;
+	case PTR_TO_BTF_ID | MEM_ALLOC:
+		ptr = reg->btf;
+		break;
+	default:
+		verbose(env, "verifier internal error: unknown reg type for lock check\n");
+		return -EFAULT;
+	}
+	id = reg->id;
+
+	if (!env->cur_state->active_lock.ptr)
+		return -EINVAL;
+	if (env->cur_state->active_lock.ptr != ptr ||
+	    env->cur_state->active_lock.id != id) {
+		verbose(env, "held lock and object are not in the same allocation\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static bool is_bpf_list_api_kfunc(u32 btf_id)
+{
+	return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
+	       btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
+	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
+	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
+}
+
+static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
+					   struct bpf_reg_state *reg, u32 regno,
+					   struct bpf_kfunc_call_arg_meta *meta)
+{
+	struct btf_field *field;
+	struct btf_record *rec;
+	u32 list_head_off;
+
+	if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
+		verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
+		return -EFAULT;
+	}
+
+	if (!tnum_is_const(reg->var_off)) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+
+	rec = reg_btf_record(reg);
+	list_head_off = reg->off + reg->var_off.value;
+	field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
+	if (!field) {
+		verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
+		return -EINVAL;
+	}
+
+	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
+	if (check_reg_allocation_locked(env, reg)) {
+		verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
+			rec->spin_lock_off);
+		return -EINVAL;
+	}
+
+	if (meta->arg_list_head.field) {
+		verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
+		return -EFAULT;
+	}
+	meta->arg_list_head.field = field;
+	return 0;
+}
+
+static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+					   struct bpf_reg_state *reg, u32 regno,
+					   struct bpf_kfunc_call_arg_meta *meta)
+{
+	const struct btf_type *et, *t;
+	struct btf_field *field;
+	struct btf_record *rec;
+	u32 list_node_off;
+
+	if (meta->btf != btf_vmlinux ||
+	    (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
+	     meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
+		verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
+		return -EFAULT;
+	}
+
+	if (!tnum_is_const(reg->var_off)) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+
+	rec = reg_btf_record(reg);
+	list_node_off = reg->off + reg->var_off.value;
+	field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
+	if (!field || field->offset != list_node_off) {
+		verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
+		return -EINVAL;
+	}
+
+	field = meta->arg_list_head.field;
+
+	et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id);
+	t = btf_type_by_id(reg->btf, reg->btf_id);
+	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf,
+				  field->list_head.value_btf_id, true)) {
+		verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
+			"in struct %s, but arg is at offset=%d in struct %s\n",
+			field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off),
+			list_node_off, btf_name_by_offset(reg->btf, t->name_off));
+		return -EINVAL;
+	}
+
+	if (list_node_off != field->list_head.node_offset) {
+		verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
+			list_node_off, field->list_head.node_offset,
+			btf_name_by_offset(field->list_head.btf, et->name_off));
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
 {
 	const char *func_name = meta->func_name, *ref_tname;
@@ -8336,6 +8554,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_KPTR:
 		case KF_ARG_PTR_TO_DYNPTR:
+		case KF_ARG_PTR_TO_LIST_HEAD:
+		case KF_ARG_PTR_TO_LIST_NODE:
 		case KF_ARG_PTR_TO_MEM:
 		case KF_ARG_PTR_TO_MEM_SIZE:
 			/* Trusted by default */
@@ -8400,6 +8620,33 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return -EINVAL;
 			}
 			break;
+		case KF_ARG_PTR_TO_LIST_HEAD:
+			if (reg->type != PTR_TO_MAP_VALUE &&
+			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
+				return -EINVAL;
+			}
+			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
+				verbose(env, "allocated object must be referenced\n");
+				return -EINVAL;
+			}
+			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
+			if (ret < 0)
+				return ret;
+			break;
+		case KF_ARG_PTR_TO_LIST_NODE:
+			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+				verbose(env, "arg#%d expected pointer to allocated object\n", i);
+				return -EINVAL;
+			}
+			if (!reg->ref_obj_id) {
+				verbose(env, "allocated object must be referenced\n");
+				return -EINVAL;
+			}
+			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
+			if (ret < 0)
+				return ret;
+			break;
 		case KF_ARG_PTR_TO_BTF_ID:
 			/* Only base_type is checked, further checks are done here */
 			if (reg->type != PTR_TO_BTF_ID &&
@@ -8568,6 +8815,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 				env->insn_aux_data[insn_idx].kptr_struct_meta =
 					btf_find_struct_meta(meta.arg_obj_drop.btf,
 							     meta.arg_obj_drop.btf_id);
+			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
+				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
+				struct btf_field *field = meta.arg_list_head.field;
+
+				mark_reg_known_zero(env, regs, BPF_REG_0);
+				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+				regs[BPF_REG_0].btf = field->list_head.btf;
+				regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
+				regs[BPF_REG_0].off = field->list_head.node_offset;
 			} else {
 				verbose(env, "kernel function %s unhandled dynamic return type\n",
 					meta.func_name);
@@ -13264,11 +13520,14 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
-				if (env->cur_state->active_lock.ptr &&
-				    (insn->src_reg == BPF_PSEUDO_CALL ||
-				     insn->imm != BPF_FUNC_spin_unlock)) {
-					verbose(env, "function calls are not allowed while holding a lock\n");
-					return -EINVAL;
+				if (env->cur_state->active_lock.ptr) {
+					if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
+					    (insn->src_reg == BPF_PSEUDO_CALL) ||
+					    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+					     (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
+						verbose(env, "function calls are not allowed while holding a lock\n");
+						return -EINVAL;
+					}
 				}
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 8473395a11af..d6b143275e82 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -35,4 +35,32 @@ extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
 /* Convenience macro to wrap over bpf_obj_drop_impl */
 #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
 
+/* Description
+ *	Add a new entry to the beginning of the BPF linked list.
+ * Returns
+ *	Void.
+ */
+extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+
+/* Description
+ *	Add a new entry to the end of the BPF linked list.
+ * Returns
+ *	Void.
+ */
+extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+
+/* Description
+ *	Remove the entry at the beginning of the BPF linked list.
+ * Returns
+ *	Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
+
+/* Description
+ *	Remove the entry at the end of the BPF linked list.
+ * Returns
+ *	Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
+
 #endif
-- 
cgit v1.2.3-58-ga151


From 64069c72b4b8e44f6876249cc8f2e2ee4d209a93 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:09 +0530
Subject: selftests/bpf: Add __contains macro to bpf_experimental.h

Add user facing __contains macro which provides a convenient wrapper
over the verbose kernel specific BTF declaration tag required to
annotate BPF list head structs in user types.

Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-20-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_experimental.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index d6b143275e82..424f7bbbfe9b 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -6,6 +6,8 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 
+#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
+
 /* Description
  *	Allocates an object of the type represented by 'local_type_id' in
  *	program BTF. User may use the bpf_core_type_id_local macro to pass the
-- 
cgit v1.2.3-58-ga151


From d85aedac4dc43deaba7aabc78198d0600bb84887 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:10 +0530
Subject: selftests/bpf: Update spinlock selftest

Make updates in preparation for adding more test cases to this selftest:
- Convert from CHECK_ to ASSERT macros.
- Use BPF skeleton
- Fix typo sping -> spin
- Rename spinlock.c -> spin_lock.c

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-21-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/spin_lock.c | 49 ++++++++++++++++++++++
 tools/testing/selftests/bpf/prog_tests/spinlock.c  | 45 --------------------
 tools/testing/selftests/bpf/progs/test_spin_lock.c |  4 +-
 3 files changed, 51 insertions(+), 47 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/spin_lock.c
 delete mode 100644 tools/testing/selftests/bpf/prog_tests/spinlock.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
new file mode 100644
index 000000000000..fab061e9d77c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_spin_lock.skel.h"
+
+static void *spin_lock_thread(void *arg)
+{
+	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
+	pthread_exit(arg);
+}
+
+void test_spinlock(void)
+{
+	struct test_spin_lock *skel;
+	pthread_t thread_id[4];
+	int prog_fd, i;
+	void *ret;
+
+	skel = test_spin_lock__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load"))
+		return;
+	prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test);
+	for (i = 0; i < 4; i++) {
+		int err;
+
+		err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+		if (!ASSERT_OK(err, "pthread_create"))
+			goto end;
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+			goto end;
+		if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+			goto end;
+	}
+end:
+	test_spin_lock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
deleted file mode 100644
index 15eb1372d771..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-static void *spin_lock_thread(void *arg)
-{
-	int err, prog_fd = *(u32 *) arg;
-	LIBBPF_OPTS(bpf_test_run_opts, topts,
-		.data_in = &pkt_v4,
-		.data_size_in = sizeof(pkt_v4),
-		.repeat = 10000,
-	);
-
-	err = bpf_prog_test_run_opts(prog_fd, &topts);
-	ASSERT_OK(err, "test_run");
-	ASSERT_OK(topts.retval, "test_run retval");
-	pthread_exit(arg);
-}
-
-void test_spinlock(void)
-{
-	const char *file = "./test_spin_lock.bpf.o";
-	pthread_t thread_id[4];
-	struct bpf_object *obj = NULL;
-	int prog_fd;
-	int err = 0, i;
-	void *ret;
-
-	err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
-	if (CHECK_FAIL(err)) {
-		printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno);
-		goto close_prog;
-	}
-	for (i = 0; i < 4; i++)
-		if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
-					      &spin_lock_thread, &prog_fd)))
-			goto close_prog;
-
-	for (i = 0; i < 4; i++)
-		if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
-			       ret != (void *)&prog_fd))
-			goto close_prog;
-close_prog:
-	bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 7e88309d3229..5bd10409285b 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -45,8 +45,8 @@ struct {
 
 #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
 
-SEC("tc")
-int bpf_sping_lock_test(struct __sk_buff *skb)
+SEC("cgroup_skb/ingress")
+int bpf_spin_lock_test(struct __sk_buff *skb)
 {
 	volatile int credit = 0, max_credit = 100, pkt_len = 64;
 	struct hmap_elem zero = {}, *val;
-- 
cgit v1.2.3-58-ga151


From c48748aea4f806587813f02219ca0b4910646c5e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:11 +0530
Subject: selftests/bpf: Add failure test cases for spin lock pairing

First, ensure that whenever a bpf_spin_lock is present in an allocation,
the reg->id is preserved. This won't be true for global variables
however, since they have a single map value per map, hence the verifier
harcodes it to 0 (so that multiple pseudo ldimm64 insns can yield the
same lock object per map at a given offset).

Next, add test cases for all possible combinations (kptr, global, map
value, inner map value). Since we lifted restriction on locking in inner
maps, also add test cases for them. Currently, each lookup into an inner
map gets a fresh reg->id, so even if the reg->map_ptr is same, they will
be treated as separate allocations and the incorrect unlock pairing will
be rejected.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-22-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/spin_lock.c |  89 ++++++++-
 .../selftests/bpf/progs/test_spin_lock_fail.c      | 204 +++++++++++++++++++++
 2 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_spin_lock_fail.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index fab061e9d77c..72282e92a78a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -3,6 +3,79 @@
 #include <network_helpers.h>
 
 #include "test_spin_lock.skel.h"
+#include "test_spin_lock_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+	const char *prog_name;
+	const char *err_msg;
+} spin_lock_fail_tests[] = {
+	{ "lock_id_kptr_preserve",
+	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
+	  "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+	  "R1 type=ptr_ expected=percpu_ptr_" },
+	{ "lock_id_global_zero",
+	  "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "R1 type=map_value expected=percpu_ptr_" },
+	{ "lock_id_mapval_preserve",
+	  "8: (bf) r1 = r0                       ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) "
+	  "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n"
+	  "R1 type=map_value expected=percpu_ptr_" },
+	{ "lock_id_innermapval_preserve",
+	  "13: (bf) r1 = r0                      ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) "
+	  "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n"
+	  "R1 type=map_value expected=percpu_ptr_" },
+	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
+	{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+};
+
+static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+						.kernel_log_size = sizeof(log_buf),
+						.kernel_log_level = 1);
+	struct test_spin_lock_fail *skel;
+	struct bpf_program *prog;
+	int ret;
+
+	skel = test_spin_lock_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto end;
+
+	bpf_program__set_autoload(prog, true);
+
+	ret = test_spin_lock_fail__load(skel);
+	if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail"))
+		goto end;
+
+	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+		fprintf(stderr, "Expected: %s\n", err_msg);
+		fprintf(stderr, "Verifier: %s\n", log_buf);
+	}
+
+end:
+	test_spin_lock_fail__destroy(skel);
+}
 
 static void *spin_lock_thread(void *arg)
 {
@@ -19,7 +92,7 @@ static void *spin_lock_thread(void *arg)
 	pthread_exit(arg);
 }
 
-void test_spinlock(void)
+void test_spin_lock_success(void)
 {
 	struct test_spin_lock *skel;
 	pthread_t thread_id[4];
@@ -47,3 +120,17 @@ void test_spinlock(void)
 end:
 	test_spin_lock__destroy(skel);
 }
+
+void test_spin_lock(void)
+{
+	int i;
+
+	test_spin_lock_success();
+
+	for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) {
+		if (!test__start_subtest(spin_lock_fail_tests[i].prog_name))
+			continue;
+		test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name,
+					 spin_lock_fail_tests[i].err_msg);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
new file mode 100644
index 000000000000..86cd183ef6dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct foo {
+	struct bpf_spin_lock lock;
+	int data;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct foo);
+	__uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+	.values = {
+		[0] = &array_map,
+	},
+};
+
+SEC(".data.A") struct bpf_spin_lock lockA;
+SEC(".data.B") struct bpf_spin_lock lockB;
+
+SEC("?tc")
+int lock_id_kptr_preserve(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_this_cpu_ptr(f);
+	return 0;
+}
+
+SEC("?tc")
+int lock_id_global_zero(void *ctx)
+{
+	bpf_this_cpu_ptr(&lockA);
+	return 0;
+}
+
+SEC("?tc")
+int lock_id_mapval_preserve(void *ctx)
+{
+	struct foo *f;
+	int key = 0;
+
+	f = bpf_map_lookup_elem(&array_map, &key);
+	if (!f)
+		return 0;
+	bpf_this_cpu_ptr(f);
+	return 0;
+}
+
+SEC("?tc")
+int lock_id_innermapval_preserve(void *ctx)
+{
+	struct foo *f;
+	int key = 0;
+	void *map;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &key);
+	if (!map)
+		return 0;
+	f = bpf_map_lookup_elem(map, &key);
+	if (!f)
+		return 0;
+	bpf_this_cpu_ptr(f);
+	return 0;
+}
+
+#define CHECK(test, A, B)                                      \
+	SEC("?tc")                                             \
+	int lock_id_mismatch_##test(void *ctx)                 \
+	{                                                      \
+		struct foo *f1, *f2, *v, *iv;                  \
+		int key = 0;                                   \
+		void *map;                                     \
+                                                               \
+		map = bpf_map_lookup_elem(&map_of_maps, &key); \
+		if (!map)                                      \
+			return 0;                              \
+		iv = bpf_map_lookup_elem(map, &key);           \
+		if (!iv)                                       \
+			return 0;                              \
+		v = bpf_map_lookup_elem(&array_map, &key);     \
+		if (!v)                                        \
+			return 0;                              \
+		f1 = bpf_obj_new(typeof(*f1));                 \
+		if (!f1)                                       \
+			return 0;                              \
+		f2 = bpf_obj_new(typeof(*f2));                 \
+		if (!f2) {                                     \
+			bpf_obj_drop(f1);                      \
+			return 0;                              \
+		}                                              \
+		bpf_spin_lock(A);                              \
+		bpf_spin_unlock(B);                            \
+		return 0;                                      \
+	}
+
+CHECK(kptr_kptr, &f1->lock, &f2->lock);
+CHECK(kptr_global, &f1->lock, &lockA);
+CHECK(kptr_mapval, &f1->lock, &v->lock);
+CHECK(kptr_innermapval, &f1->lock, &iv->lock);
+
+CHECK(global_global, &lockA, &lockB);
+CHECK(global_kptr, &lockA, &f1->lock);
+CHECK(global_mapval, &lockA, &v->lock);
+CHECK(global_innermapval, &lockA, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_mapval_mapval(void *ctx)
+{
+	struct foo *f1, *f2;
+	int key = 0;
+
+	f1 = bpf_map_lookup_elem(&array_map, &key);
+	if (!f1)
+		return 0;
+	f2 = bpf_map_lookup_elem(&array_map, &key);
+	if (!f2)
+		return 0;
+
+	bpf_spin_lock(&f1->lock);
+	f1->data = 42;
+	bpf_spin_unlock(&f2->lock);
+
+	return 0;
+}
+
+CHECK(mapval_kptr, &v->lock, &f1->lock);
+CHECK(mapval_global, &v->lock, &lockB);
+CHECK(mapval_innermapval, &v->lock, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval1(void *ctx)
+{
+	struct foo *f1, *f2;
+	int key = 0;
+	void *map;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &key);
+	if (!map)
+		return 0;
+	f1 = bpf_map_lookup_elem(map, &key);
+	if (!f1)
+		return 0;
+	f2 = bpf_map_lookup_elem(map, &key);
+	if (!f2)
+		return 0;
+
+	bpf_spin_lock(&f1->lock);
+	f1->data = 42;
+	bpf_spin_unlock(&f2->lock);
+
+	return 0;
+}
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval2(void *ctx)
+{
+	struct foo *f1, *f2;
+	int key = 0;
+	void *map;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &key);
+	if (!map)
+		return 0;
+	f1 = bpf_map_lookup_elem(map, &key);
+	if (!f1)
+		return 0;
+	map = bpf_map_lookup_elem(&map_of_maps, &key);
+	if (!map)
+		return 0;
+	f2 = bpf_map_lookup_elem(map, &key);
+	if (!f2)
+		return 0;
+
+	bpf_spin_lock(&f1->lock);
+	f1->data = 42;
+	bpf_spin_unlock(&f2->lock);
+
+	return 0;
+}
+
+CHECK(innermapval_kptr, &iv->lock, &f1->lock);
+CHECK(innermapval_global, &iv->lock, &lockA);
+CHECK(innermapval_mapval, &iv->lock, &v->lock);
+
+#undef CHECK
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-58-ga151


From 300f19dcdb99b708353d9e46fd660a4765ab277d Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:12 +0530
Subject: selftests/bpf: Add BPF linked list API tests

Include various tests covering the success and failure cases. Also, run
the success cases at runtime to verify correctness of linked list
manipulation routines, in addition to ensuring successful verification.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-23-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.aarch64       |   1 +
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../testing/selftests/bpf/prog_tests/linked_list.c | 255 +++++++++
 tools/testing/selftests/bpf/progs/linked_list.c    | 370 +++++++++++++
 tools/testing/selftests/bpf/progs/linked_list.h    |  56 ++
 .../testing/selftests/bpf/progs/linked_list_fail.c | 581 +++++++++++++++++++++
 6 files changed, 1264 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/linked_list.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_list.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_list.h
 create mode 100644 tools/testing/selftests/bpf/progs/linked_list_fail.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 09416d5d2e33..affc5aebbf0f 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -38,6 +38,7 @@ kprobe_multi_test/skel_api                       # kprobe_multi__attach unexpect
 ksyms_module/libbpf                              # 'bpf_testmod_ksym_percpu': not found in kernel BTF
 ksyms_module/lskel                               # test_ksyms_module_lskel__open_and_load unexpected error: -2
 libbpf_get_fd_by_id_opts                         # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524)
+linked_list
 lookup_key                                       # test_lookup_key__attach unexpected error: -524 (errno 524)
 lru_bug                                          # lru_bug__attach unexpected error: -524 (errno 524)
 modify_return                                    # modify_return__attach failed unexpected error: -524 (errno 524)
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index be4e3d47ea3e..072243af93b0 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -33,6 +33,7 @@ ksyms_module                             # test_ksyms_module__open_and_load unex
 ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
 ksyms_module_lskel                       # test_ksyms_module_lskel__open_and_load unexpected error: -9                 (?)
 libbpf_get_fd_by_id_opts                 # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+linked_list				 # JIT does not support calling kernel function                                (kfunc)
 lookup_key                               # JIT does not support calling kernel function                                (kfunc)
 lru_bug                                  # prog 'printk': failed to auto-attach: -524
 map_kptr                                 # failed to open_and_load program: -524 (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
new file mode 100644
index 000000000000..41e588807321
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "linked_list.skel.h"
+#include "linked_list_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+	const char *prog_name;
+	const char *err_msg;
+} linked_list_fail_tests[] = {
+#define TEST(test, off) \
+	{ #test "_missing_lock_push_front", \
+	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+	{ #test "_missing_lock_push_back", \
+	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+	{ #test "_missing_lock_pop_front", \
+	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+	{ #test "_missing_lock_pop_back", \
+	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
+	TEST(kptr, 32)
+	TEST(global, 16)
+	TEST(map, 0)
+	TEST(inner_map, 0)
+#undef TEST
+#define TEST(test, op) \
+	{ #test "_kptr_incorrect_lock_" #op, \
+	  "held lock and object are not in the same allocation\n" \
+	  "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
+	{ #test "_global_incorrect_lock_" #op, \
+	  "held lock and object are not in the same allocation\n" \
+	  "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
+	{ #test "_map_incorrect_lock_" #op, \
+	  "held lock and object are not in the same allocation\n" \
+	  "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
+	{ #test "_inner_map_incorrect_lock_" #op, \
+	  "held lock and object are not in the same allocation\n" \
+	  "bpf_spin_lock at off=0 must be held for bpf_list_head" },
+	TEST(kptr, push_front)
+	TEST(kptr, push_back)
+	TEST(kptr, pop_front)
+	TEST(kptr, pop_back)
+	TEST(global, push_front)
+	TEST(global, push_back)
+	TEST(global, pop_front)
+	TEST(global, pop_back)
+	TEST(map, push_front)
+	TEST(map, push_back)
+	TEST(map, pop_front)
+	TEST(map, pop_back)
+	TEST(inner_map, push_front)
+	TEST(inner_map, push_back)
+	TEST(inner_map, pop_front)
+	TEST(inner_map, pop_back)
+#undef TEST
+	{ "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
+	{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
+	{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
+	{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
+	{ "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
+	{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
+	{ "obj_new_acq", "Unreleased reference id=" },
+	{ "use_after_drop", "invalid mem access 'scalar'" },
+	{ "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+	{ "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
+	{ "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
+	{ "direct_read_head", "direct access to bpf_list_head is disallowed" },
+	{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
+	{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
+	{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
+	{ "write_after_push_front", "only read is supported" },
+	{ "write_after_push_back", "only read is supported" },
+	{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
+	{ "use_after_unlock_push_back", "invalid mem access 'scalar'" },
+	{ "double_push_front", "arg#1 expected pointer to allocated object" },
+	{ "double_push_back", "arg#1 expected pointer to allocated object" },
+	{ "no_node_value_type", "bpf_list_node not found at offset=0" },
+	{ "incorrect_value_type",
+	  "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, "
+	  "but arg is at offset=0 in struct bar" },
+	{ "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+	{ "incorrect_node_off1", "bpf_list_node not found at offset=1" },
+	{ "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" },
+	{ "no_head_type", "bpf_list_head not found at offset=0" },
+	{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
+	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+	{ "incorrect_head_off1", "bpf_list_head not found at offset=17" },
+	{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
+	{ "pop_front_off",
+	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
+	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+	  "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+	{ "pop_back_off",
+	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
+	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
+	  "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+};
+
+static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+						.kernel_log_size = sizeof(log_buf),
+						.kernel_log_level = 1);
+	struct linked_list_fail *skel;
+	struct bpf_program *prog;
+	int ret;
+
+	skel = linked_list_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto end;
+
+	bpf_program__set_autoload(prog, true);
+
+	ret = linked_list_fail__load(skel);
+	if (!ASSERT_ERR(ret, "linked_list_fail__load must fail"))
+		goto end;
+
+	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+		fprintf(stderr, "Expected: %s\n", err_msg);
+		fprintf(stderr, "Verifier: %s\n", log_buf);
+	}
+
+end:
+	linked_list_fail__destroy(skel);
+}
+
+static void clear_fields(struct bpf_map *map)
+{
+	char buf[24];
+	int key = 0;
+
+	memset(buf, 0xff, sizeof(buf));
+	ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields");
+}
+
+enum {
+	TEST_ALL,
+	PUSH_POP,
+	PUSH_POP_MULT,
+	LIST_IN_LIST,
+};
+
+static void test_linked_list_success(int mode, bool leave_in_map)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+	struct linked_list *skel;
+	int ret;
+
+	skel = linked_list__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load"))
+		return;
+
+	if (mode == LIST_IN_LIST)
+		goto lil;
+	if (mode == PUSH_POP_MULT)
+		goto ppm;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts);
+	ASSERT_OK(ret, "map_list_push_pop");
+	ASSERT_OK(opts.retval, "map_list_push_pop retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.array_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts);
+	ASSERT_OK(ret, "inner_map_list_push_pop");
+	ASSERT_OK(opts.retval, "inner_map_list_push_pop retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.inner_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
+	ASSERT_OK(ret, "global_list_push_pop");
+	ASSERT_OK(opts.retval, "global_list_push_pop retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.data_A);
+
+	if (mode == PUSH_POP)
+		goto end;
+
+ppm:
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts);
+	ASSERT_OK(ret, "map_list_push_pop_multiple");
+	ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.array_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts);
+	ASSERT_OK(ret, "inner_map_list_push_pop_multiple");
+	ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.inner_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
+	ASSERT_OK(ret, "global_list_push_pop_multiple");
+	ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.data_A);
+
+	if (mode == PUSH_POP_MULT)
+		goto end;
+
+lil:
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts);
+	ASSERT_OK(ret, "map_list_in_list");
+	ASSERT_OK(opts.retval, "map_list_in_list retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.array_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts);
+	ASSERT_OK(ret, "inner_map_list_in_list");
+	ASSERT_OK(opts.retval, "inner_map_list_in_list retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.inner_map);
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
+	ASSERT_OK(ret, "global_list_in_list");
+	ASSERT_OK(opts.retval, "global_list_in_list retval");
+	if (!leave_in_map)
+		clear_fields(skel->maps.data_A);
+end:
+	linked_list__destroy(skel);
+}
+
+void test_linked_list(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) {
+		if (!test__start_subtest(linked_list_fail_tests[i].prog_name))
+			continue;
+		test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name,
+					   linked_list_fail_tests[i].err_msg);
+	}
+	test_linked_list_success(PUSH_POP, false);
+	test_linked_list_success(PUSH_POP, true);
+	test_linked_list_success(PUSH_POP_MULT, false);
+	test_linked_list_success(PUSH_POP_MULT, true);
+	test_linked_list_success(LIST_IN_LIST, false);
+	test_linked_list_success(LIST_IN_LIST, true);
+	test_linked_list_success(TEST_ALL, false);
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
new file mode 100644
index 000000000000..2c7b615c6d41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#include "linked_list.h"
+
+static __always_inline
+int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+	struct bpf_list_node *n;
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 2;
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		bpf_obj_drop(f);
+		return 3;
+	}
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_back(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		bpf_obj_drop(f);
+		return 4;
+	}
+
+
+	bpf_spin_lock(lock);
+	f->data = 42;
+	bpf_list_push_front(head, &f->node);
+	bpf_spin_unlock(lock);
+	if (leave_in_map)
+		return 0;
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_back(head);
+	bpf_spin_unlock(lock);
+	if (!n)
+		return 5;
+	f = container_of(n, struct foo, node);
+	if (f->data != 42) {
+		bpf_obj_drop(f);
+		return 6;
+	}
+
+	bpf_spin_lock(lock);
+	f->data = 13;
+	bpf_list_push_front(head, &f->node);
+	bpf_spin_unlock(lock);
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	if (!n)
+		return 7;
+	f = container_of(n, struct foo, node);
+	if (f->data != 13) {
+		bpf_obj_drop(f);
+		return 8;
+	}
+	bpf_obj_drop(f);
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		return 9;
+	}
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_back(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		return 10;
+	}
+	return 0;
+}
+
+
+static __always_inline
+int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+	struct bpf_list_node *n;
+	struct foo *f[8], *pf;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(f); i++) {
+		f[i] = bpf_obj_new(typeof(**f));
+		if (!f[i])
+			return 2;
+		f[i]->data = i;
+		bpf_spin_lock(lock);
+		bpf_list_push_front(head, &f[i]->node);
+		bpf_spin_unlock(lock);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(f); i++) {
+		bpf_spin_lock(lock);
+		n = bpf_list_pop_front(head);
+		bpf_spin_unlock(lock);
+		if (!n)
+			return 3;
+		pf = container_of(n, struct foo, node);
+		if (pf->data != (ARRAY_SIZE(f) - i - 1)) {
+			bpf_obj_drop(pf);
+			return 4;
+		}
+		bpf_spin_lock(lock);
+		bpf_list_push_back(head, &pf->node);
+		bpf_spin_unlock(lock);
+	}
+
+	if (leave_in_map)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(f); i++) {
+		bpf_spin_lock(lock);
+		n = bpf_list_pop_back(head);
+		bpf_spin_unlock(lock);
+		if (!n)
+			return 5;
+		pf = container_of(n, struct foo, node);
+		if (pf->data != i) {
+			bpf_obj_drop(pf);
+			return 6;
+		}
+		bpf_obj_drop(pf);
+	}
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_back(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		return 7;
+	}
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	if (n) {
+		bpf_obj_drop(container_of(n, struct foo, node));
+		return 8;
+	}
+	return 0;
+}
+
+static __always_inline
+int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+	struct bpf_list_node *n;
+	struct bar *ba[8], *b;
+	struct foo *f;
+	int i;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 2;
+	for (i = 0; i < ARRAY_SIZE(ba); i++) {
+		b = bpf_obj_new(typeof(*b));
+		if (!b) {
+			bpf_obj_drop(f);
+			return 3;
+		}
+		b->data = i;
+		bpf_spin_lock(&f->lock);
+		bpf_list_push_back(&f->head, &b->node);
+		bpf_spin_unlock(&f->lock);
+	}
+
+	bpf_spin_lock(lock);
+	f->data = 42;
+	bpf_list_push_front(head, &f->node);
+	bpf_spin_unlock(lock);
+
+	if (leave_in_map)
+		return 0;
+
+	bpf_spin_lock(lock);
+	n = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	if (!n)
+		return 4;
+	f = container_of(n, struct foo, node);
+	if (f->data != 42) {
+		bpf_obj_drop(f);
+		return 5;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ba); i++) {
+		bpf_spin_lock(&f->lock);
+		n = bpf_list_pop_front(&f->head);
+		bpf_spin_unlock(&f->lock);
+		if (!n) {
+			bpf_obj_drop(f);
+			return 6;
+		}
+		b = container_of(n, struct bar, node);
+		if (b->data != i) {
+			bpf_obj_drop(f);
+			bpf_obj_drop(b);
+			return 7;
+		}
+		bpf_obj_drop(b);
+	}
+	bpf_spin_lock(&f->lock);
+	n = bpf_list_pop_front(&f->head);
+	bpf_spin_unlock(&f->lock);
+	if (n) {
+		bpf_obj_drop(f);
+		bpf_obj_drop(container_of(n, struct bar, node));
+		return 8;
+	}
+	bpf_obj_drop(f);
+	return 0;
+}
+
+static __always_inline
+int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+	int ret;
+
+	ret = list_push_pop(lock, head, false);
+	if (ret)
+		return ret;
+	return list_push_pop(lock, head, true);
+}
+
+static __always_inline
+int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+	int ret;
+
+	ret = list_push_pop_multiple(lock ,head, false);
+	if (ret)
+		return ret;
+	return list_push_pop_multiple(lock, head, true);
+}
+
+static __always_inline
+int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+	int ret;
+
+	ret = list_in_list(lock, head, false);
+	if (ret)
+		return ret;
+	return list_in_list(lock, head, true);
+}
+
+SEC("tc")
+int map_list_push_pop(void *ctx)
+{
+	struct map_value *v;
+
+	v = bpf_map_lookup_elem(&array_map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop(void *ctx)
+{
+	struct map_value *v;
+	void *map;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+	if (!map)
+		return 1;
+	v = bpf_map_lookup_elem(map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop(void *ctx)
+{
+	return test_list_push_pop(&glock, &ghead);
+}
+
+SEC("tc")
+int map_list_push_pop_multiple(void *ctx)
+{
+	struct map_value *v;
+	int ret;
+
+	v = bpf_map_lookup_elem(&array_map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop_multiple(void *ctx)
+{
+	struct map_value *v;
+	void *map;
+	int ret;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+	if (!map)
+		return 1;
+	v = bpf_map_lookup_elem(map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop_multiple(void *ctx)
+{
+	int ret;
+
+	ret = list_push_pop_multiple(&glock, &ghead, false);
+	if (ret)
+		return ret;
+	return list_push_pop_multiple(&glock, &ghead, true);
+}
+
+SEC("tc")
+int map_list_in_list(void *ctx)
+{
+	struct map_value *v;
+	int ret;
+
+	v = bpf_map_lookup_elem(&array_map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_in_list(void *ctx)
+{
+	struct map_value *v;
+	void *map;
+	int ret;
+
+	map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+	if (!map)
+		return 1;
+	v = bpf_map_lookup_elem(map, &(int){0});
+	if (!v)
+		return 1;
+	return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_in_list(void *ctx)
+{
+	return test_list_in_list(&glock, &ghead);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
new file mode 100644
index 000000000000..8db80ed64db1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef LINKED_LIST_H
+#define LINKED_LIST_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct bar {
+	struct bpf_list_node node;
+	int data;
+};
+
+struct foo {
+	struct bpf_list_node node;
+	struct bpf_list_head head __contains(bar, node);
+	struct bpf_spin_lock lock;
+	int data;
+	struct bpf_list_node node2;
+};
+
+struct map_value {
+	struct bpf_spin_lock lock;
+	int data;
+	struct bpf_list_head head __contains(foo, node);
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+};
+
+struct array_map array_map SEC(".maps");
+struct array_map inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+	.values = {
+		[0] = &inner_map,
+	},
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(foo, node);
+private(B) struct bpf_spin_lock glock2;
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
new file mode 100644
index 000000000000..1d9017240e19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#include "linked_list.h"
+
+#define INIT                                                  \
+	struct map_value *v, *v2, *iv, *iv2;                  \
+	struct foo *f, *f1, *f2;                              \
+	struct bar *b;                                        \
+	void *map;                                            \
+                                                              \
+	map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \
+	if (!map)                                             \
+		return 0;                                     \
+	v = bpf_map_lookup_elem(&array_map, &(int){ 0 });     \
+	if (!v)                                               \
+		return 0;                                     \
+	v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 });    \
+	if (!v2)                                              \
+		return 0;                                     \
+	iv = bpf_map_lookup_elem(map, &(int){ 0 });           \
+	if (!iv)                                              \
+		return 0;                                     \
+	iv2 = bpf_map_lookup_elem(map, &(int){ 0 });          \
+	if (!iv2)                                             \
+		return 0;                                     \
+	f = bpf_obj_new(typeof(*f));                          \
+	if (!f)                                               \
+		return 0;                                     \
+	f1 = f;                                               \
+	f2 = bpf_obj_new(typeof(*f2));                        \
+	if (!f2) {                                            \
+		bpf_obj_drop(f1);                             \
+		return 0;                                     \
+	}                                                     \
+	b = bpf_obj_new(typeof(*b));                          \
+	if (!b) {                                             \
+		bpf_obj_drop(f2);                             \
+		bpf_obj_drop(f1);                             \
+		return 0;                                     \
+	}
+
+#define CHECK(test, op, hexpr)                              \
+	SEC("?tc")                                          \
+	int test##_missing_lock_##op(void *ctx)             \
+	{                                                   \
+		INIT;                                       \
+		void (*p)(void *) = (void *)&bpf_list_##op; \
+		p(hexpr);                                   \
+		return 0;                                   \
+	}
+
+CHECK(kptr, push_front, &f->head);
+CHECK(kptr, push_back, &f->head);
+CHECK(kptr, pop_front, &f->head);
+CHECK(kptr, pop_back, &f->head);
+
+CHECK(global, push_front, &ghead);
+CHECK(global, push_back, &ghead);
+CHECK(global, pop_front, &ghead);
+CHECK(global, pop_back, &ghead);
+
+CHECK(map, push_front, &v->head);
+CHECK(map, push_back, &v->head);
+CHECK(map, pop_front, &v->head);
+CHECK(map, pop_back, &v->head);
+
+CHECK(inner_map, push_front, &iv->head);
+CHECK(inner_map, push_back, &iv->head);
+CHECK(inner_map, pop_front, &iv->head);
+CHECK(inner_map, pop_back, &iv->head);
+
+#undef CHECK
+
+#define CHECK(test, op, lexpr, hexpr)                       \
+	SEC("?tc")                                          \
+	int test##_incorrect_lock_##op(void *ctx)           \
+	{                                                   \
+		INIT;                                       \
+		void (*p)(void *) = (void *)&bpf_list_##op; \
+		bpf_spin_lock(lexpr);                       \
+		p(hexpr);                                   \
+		return 0;                                   \
+	}
+
+#define CHECK_OP(op)                                           \
+	CHECK(kptr_kptr, op, &f1->lock, &f2->head);            \
+	CHECK(kptr_global, op, &f1->lock, &ghead);             \
+	CHECK(kptr_map, op, &f1->lock, &v->head);              \
+	CHECK(kptr_inner_map, op, &f1->lock, &iv->head);       \
+                                                               \
+	CHECK(global_global, op, &glock2, &ghead);             \
+	CHECK(global_kptr, op, &glock, &f1->head);             \
+	CHECK(global_map, op, &glock, &v->head);               \
+	CHECK(global_inner_map, op, &glock, &iv->head);        \
+                                                               \
+	CHECK(map_map, op, &v->lock, &v2->head);               \
+	CHECK(map_kptr, op, &v->lock, &f2->head);              \
+	CHECK(map_global, op, &v->lock, &ghead);               \
+	CHECK(map_inner_map, op, &v->lock, &iv->head);         \
+                                                               \
+	CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
+	CHECK(inner_map_kptr, op, &iv->lock, &f2->head);       \
+	CHECK(inner_map_global, op, &iv->lock, &ghead);        \
+	CHECK(inner_map_map, op, &iv->lock, &v->head);
+
+CHECK_OP(push_front);
+CHECK_OP(push_back);
+CHECK_OP(pop_front);
+CHECK_OP(pop_back);
+
+#undef CHECK
+#undef CHECK_OP
+#undef INIT
+
+SEC("?kprobe/xyz")
+int map_compat_kprobe(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?kretprobe/xyz")
+int map_compat_kretprobe(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?tracepoint/xyz")
+int map_compat_tp(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?perf_event")
+int map_compat_perf(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?raw_tp/xyz")
+int map_compat_raw_tp(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?raw_tp.w/xyz")
+int map_compat_raw_tp_w(void *ctx)
+{
+	bpf_list_push_front(&ghead, NULL);
+	return 0;
+}
+
+SEC("?tc")
+int obj_type_id_oor(void *ctx)
+{
+	bpf_obj_new_impl(~0UL, NULL);
+	return 0;
+}
+
+SEC("?tc")
+int obj_new_no_composite(void *ctx)
+{
+	bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42);
+	return 0;
+}
+
+SEC("?tc")
+int obj_new_no_struct(void *ctx)
+{
+
+	bpf_obj_new(union { int data; unsigned udata; });
+	return 0;
+}
+
+SEC("?tc")
+int obj_drop_non_zero_off(void *ctx)
+{
+	void *f;
+
+	f = bpf_obj_new(struct foo);
+	if (!f)
+		return 0;
+	bpf_obj_drop(f+1);
+	return 0;
+}
+
+SEC("?tc")
+int new_null_ret(void *ctx)
+{
+	return bpf_obj_new(struct foo)->data;
+}
+
+SEC("?tc")
+int obj_new_acq(void *ctx)
+{
+	bpf_obj_new(struct foo);
+	return 0;
+}
+
+SEC("?tc")
+int use_after_drop(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_obj_drop(f);
+	return f->data;
+}
+
+SEC("?tc")
+int ptr_walk_scalar(void *ctx)
+{
+	struct test1 {
+		struct test2 {
+			struct test2 *next;
+		} *ptr;
+	} *p;
+
+	p = bpf_obj_new(typeof(*p));
+	if (!p)
+		return 0;
+	bpf_this_cpu_ptr(p->ptr);
+	return 0;
+}
+
+SEC("?tc")
+int direct_read_lock(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	return *(int *)&f->lock;
+}
+
+SEC("?tc")
+int direct_write_lock(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	*(int *)&f->lock = 0;
+	return 0;
+}
+
+SEC("?tc")
+int direct_read_head(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	return *(int *)&f->head;
+}
+
+SEC("?tc")
+int direct_write_head(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	*(int *)&f->head = 0;
+	return 0;
+}
+
+SEC("?tc")
+int direct_read_node(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	return *(int *)&f->node;
+}
+
+SEC("?tc")
+int direct_write_node(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	*(int *)&f->node = 0;
+	return 0;
+}
+
+static __always_inline
+int write_after_op(void (*push_op)(void *head, void *node))
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	push_op(&ghead, &f->node);
+	f->data = 42;
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int write_after_push_front(void *ctx)
+{
+	return write_after_op((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int write_after_push_back(void *ctx)
+{
+	return write_after_op((void *)bpf_list_push_back);
+}
+
+static __always_inline
+int use_after_unlock(void (*op)(void *head, void *node))
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	f->data = 42;
+	op(&ghead, &f->node);
+	bpf_spin_unlock(&glock);
+
+	return f->data;
+}
+
+SEC("?tc")
+int use_after_unlock_push_front(void *ctx)
+{
+	return use_after_unlock((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int use_after_unlock_push_back(void *ctx)
+{
+	return use_after_unlock((void *)bpf_list_push_back);
+}
+
+static __always_inline
+int list_double_add(void (*op)(void *head, void *node))
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	op(&ghead, &f->node);
+	op(&ghead, &f->node);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int double_push_front(void *ctx)
+{
+	return list_double_add((void *)bpf_list_push_front);
+}
+
+SEC("?tc")
+int double_push_back(void *ctx)
+{
+	return list_double_add((void *)bpf_list_push_back);
+}
+
+SEC("?tc")
+int no_node_value_type(void *ctx)
+{
+	void *p;
+
+	p = bpf_obj_new(struct { int data; });
+	if (!p)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(&ghead, p);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_value_type(void *ctx)
+{
+	struct bar *b;
+
+	b = bpf_obj_new(typeof(*b));
+	if (!b)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(&ghead, &b->node);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_node_var_off(struct __sk_buff *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off1(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(&ghead, (void *)&f->node + 1);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off2(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(&ghead, &f->node2);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int no_head_type(void *ctx)
+{
+	void *p;
+
+	p = bpf_obj_new(typeof(struct { int data; }));
+	if (!p)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front(p, NULL);
+	bpf_spin_lock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off1(struct __sk_buff *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off2(struct __sk_buff *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&glock);
+	bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off1(void *ctx)
+{
+	struct foo *f;
+	struct bar *b;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	b = bpf_obj_new(typeof(*b));
+	if (!b) {
+		bpf_obj_drop(f);
+		return 0;
+	}
+
+	bpf_spin_lock(&f->lock);
+	bpf_list_push_front((void *)&f->head + 1, &b->node);
+	bpf_spin_unlock(&f->lock);
+
+	return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off2(void *ctx)
+{
+	struct foo *f;
+	struct bar *b;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+
+	bpf_spin_lock(&glock);
+	bpf_list_push_front((void *)&ghead + 1, &f->node);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+static __always_inline
+int pop_ptr_off(void *(*op)(void *head))
+{
+	struct {
+		struct bpf_list_head head __contains(foo, node2);
+		struct bpf_spin_lock lock;
+	} *p;
+	struct bpf_list_node *n;
+
+	p = bpf_obj_new(typeof(*p));
+	if (!p)
+		return 0;
+	bpf_spin_lock(&p->lock);
+	n = op(&p->head);
+	bpf_spin_unlock(&p->lock);
+
+	bpf_this_cpu_ptr(n);
+	return 0;
+}
+
+SEC("?tc")
+int pop_front_off(void *ctx)
+{
+	return pop_ptr_off((void *)bpf_list_pop_front);
+}
+
+SEC("?tc")
+int pop_back_off(void *ctx)
+{
+	return pop_ptr_off((void *)bpf_list_pop_back);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-58-ga151


From dc2df7bf4c8a24a55ef02ef45dd3e49abc105f76 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:13 +0530
Subject: selftests/bpf: Add BTF sanity tests

Preparing the metadata for bpf_list_head involves a complicated parsing
step and type resolution for the contained value. Ensure that corner
cases are tested against and invalid specifications in source are duly
rejected. Also include tests for incorrect ownership relationships in
the BTF.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-24-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/linked_list.c | 485 +++++++++++++++++++++
 1 file changed, 485 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 41e588807321..dd73d0a62c6e 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+#include <test_btf.h>
+#include <linux/btf.h>
 #include <test_progs.h>
 #include <network_helpers.h>
 
@@ -235,6 +238,487 @@ end:
 	linked_list__destroy(skel);
 }
 
+#define SPIN_LOCK 2
+#define LIST_HEAD 3
+#define LIST_NODE 4
+
+static struct btf *init_btf(void)
+{
+	int id, lid, hid, nid;
+	struct btf *btf;
+
+	btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf, "btf__new_empty"))
+		return NULL;
+	id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+	if (!ASSERT_EQ(id, 1, "btf__add_int"))
+		goto end;
+	lid = btf__add_struct(btf, "bpf_spin_lock", 4);
+	if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock"))
+		goto end;
+	hid = btf__add_struct(btf, "bpf_list_head", 16);
+	if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
+		goto end;
+	nid = btf__add_struct(btf, "bpf_list_node", 16);
+	if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
+		goto end;
+	return btf;
+end:
+	btf__free(btf);
+	return NULL;
+}
+
+static void test_btf(void)
+{
+	struct btf *btf = NULL;
+	int id, err;
+
+	while (test__start_subtest("btf: too many locks")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 24);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0);
+		if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+			break;
+		err = btf__add_field(btf, "c", LIST_HEAD, 64, 0);
+		if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -E2BIG, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: missing lock")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 16);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a"))
+			break;
+		id = btf__add_struct(btf, "baz", 16);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct baz"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field baz::a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EINVAL, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: bad offset")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 36);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EEXIST, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: missing contains:")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 24);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EINVAL, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: missing struct")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 24);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -ENOENT, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: missing node")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 24);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		btf__free(btf);
+		ASSERT_EQ(err, -ENOENT, "check btf");
+		break;
+	}
+
+	while (test__start_subtest("btf: node incorrect type")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 20);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+			break;
+		id = btf__add_struct(btf, "bar", 4);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EINVAL, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: multiple bpf_list_node with name b")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 52);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::c"))
+			break;
+		err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::d"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EINVAL, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning | owned AA cycle")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 36);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -ELOOP, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning | owned ABA cycle")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 36);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+			break;
+		id = btf__add_struct(btf, "bar", 36);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
+		if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -ELOOP, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning -> owned")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 20);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+			break;
+		id = btf__add_struct(btf, "bar", 16);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, 0, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning -> owning | owned -> owned")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 20);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+			break;
+		id = btf__add_struct(btf, "bar", 36);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+		if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+			break;
+		id = btf__add_struct(btf, "baz", 16);
+		if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field baz:a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, 0, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 36);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+			break;
+		id = btf__add_struct(btf, "bar", 36);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar:a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar:b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar:c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+		if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+			break;
+		id = btf__add_struct(btf, "baz", 16);
+		if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field baz:a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -ELOOP, "check btf");
+		btf__free(btf);
+		break;
+	}
+
+	while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+		id = btf__add_struct(btf, "foo", 20);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+		if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+			break;
+		id = btf__add_struct(btf, "bar", 36);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
+		if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
+			break;
+		id = btf__add_struct(btf, "baz", 36);
+		if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+		err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::b"))
+			break;
+		err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::c"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
+		if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
+			break;
+		id = btf__add_struct(btf, "bam", 16);
+		if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bam::a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -ELOOP, "check btf");
+		btf__free(btf);
+		break;
+	}
+}
+
 void test_linked_list(void)
 {
 	int i;
@@ -245,6 +729,7 @@ void test_linked_list(void)
 		test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name,
 					   linked_list_fail_tests[i].err_msg);
 	}
+	test_btf();
 	test_linked_list_success(PUSH_POP, false);
 	test_linked_list_success(PUSH_POP, true);
 	test_linked_list_success(PUSH_POP_MULT, false);
-- 
cgit v1.2.3-58-ga151


From 0a2f85a1be4328d29aefa54684d10c23a3298fef Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 18 Nov 2022 07:26:14 +0530
Subject: selftests/bpf: Temporarily disable linked list tests

The latest clang nightly as of writing crashes with the given test case
for BPF linked lists wherever global glock, ghead, glock2 are used,
hence comment out the parts that cause the crash, and prepare this commit
so that it can be reverted when the fix has been made. More context in [0].

 [0]: https://lore.kernel.org/bpf/d56223f9-483e-fbc1-4564-44c0858a1e3e@meta.com

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-25-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/linked_list.c  | 21 ++++++++++++++-------
 tools/testing/selftests/bpf/progs/linked_list.c     | 11 ++++++++++-
 tools/testing/selftests/bpf/progs/linked_list.h     |  2 ++
 .../testing/selftests/bpf/progs/linked_list_fail.c  | 16 ++++++++--------
 4 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index dd73d0a62c6e..6170d36fe5fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -24,7 +24,9 @@ static struct {
 	{ #test "_missing_lock_pop_back", \
 	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
 	TEST(kptr, 32)
+/* FIXME
 	TEST(global, 16)
+*/
 	TEST(map, 0)
 	TEST(inner_map, 0)
 #undef TEST
@@ -32,9 +34,6 @@ static struct {
 	{ #test "_kptr_incorrect_lock_" #op, \
 	  "held lock and object are not in the same allocation\n" \
 	  "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
-	{ #test "_global_incorrect_lock_" #op, \
-	  "held lock and object are not in the same allocation\n" \
-	  "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
 	{ #test "_map_incorrect_lock_" #op, \
 	  "held lock and object are not in the same allocation\n" \
 	  "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
@@ -45,10 +44,6 @@ static struct {
 	TEST(kptr, push_back)
 	TEST(kptr, pop_front)
 	TEST(kptr, pop_back)
-	TEST(global, push_front)
-	TEST(global, push_back)
-	TEST(global, pop_front)
-	TEST(global, pop_back)
 	TEST(map, push_front)
 	TEST(map, push_back)
 	TEST(map, pop_front)
@@ -58,12 +53,14 @@ static struct {
 	TEST(inner_map, pop_front)
 	TEST(inner_map, pop_back)
 #undef TEST
+/* FIXME
 	{ "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
+*/
 	{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
 	{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
 	{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
@@ -78,6 +75,7 @@ static struct {
 	{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
 	{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
 	{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
+/* FIXME
 	{ "write_after_push_front", "only read is supported" },
 	{ "write_after_push_back", "only read is supported" },
 	{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
@@ -94,8 +92,11 @@ static struct {
 	{ "no_head_type", "bpf_list_head not found at offset=0" },
 	{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
 	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+*/
 	{ "incorrect_head_off1", "bpf_list_head not found at offset=17" },
+/* FIXME
 	{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
+*/
 	{ "pop_front_off",
 	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
 	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
@@ -188,8 +189,10 @@ static void test_linked_list_success(int mode, bool leave_in_map)
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
 	ASSERT_OK(ret, "global_list_push_pop");
 	ASSERT_OK(opts.retval, "global_list_push_pop retval");
+	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
+	*/
 
 	if (mode == PUSH_POP)
 		goto end;
@@ -210,8 +213,10 @@ ppm:
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
 	ASSERT_OK(ret, "global_list_push_pop_multiple");
 	ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
+	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
+	*/
 
 	if (mode == PUSH_POP_MULT)
 		goto end;
@@ -232,8 +237,10 @@ lil:
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
 	ASSERT_OK(ret, "global_list_in_list");
 	ASSERT_OK(opts.retval, "global_list_in_list retval");
+	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
+	*/
 end:
 	linked_list__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 2c7b615c6d41..a99103c86e48 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -291,7 +291,10 @@ int inner_map_list_push_pop(void *ctx)
 SEC("tc")
 int global_list_push_pop(void *ctx)
 {
-	return test_list_push_pop(&glock, &ghead);
+	/* FIXME:
+	 * return test_list_push_pop(&glock, &ghead);
+	 */
+	return 0;
 }
 
 SEC("tc")
@@ -327,10 +330,13 @@ int global_list_push_pop_multiple(void *ctx)
 {
 	int ret;
 
+	/* FIXME:
 	ret = list_push_pop_multiple(&glock, &ghead, false);
 	if (ret)
 		return ret;
 	return list_push_pop_multiple(&glock, &ghead, true);
+	*/
+	return 0;
 }
 
 SEC("tc")
@@ -364,7 +370,10 @@ int inner_map_list_in_list(void *ctx)
 SEC("tc")
 int global_list_in_list(void *ctx)
 {
+	/* FIXME
 	return test_list_in_list(&glock, &ghead);
+	*/
+	return 0;
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
index 8db80ed64db1..93157efc2d04 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.h
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -47,10 +47,12 @@ struct {
 	},
 };
 
+/* FIXME
 #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
 
 private(A) struct bpf_spin_lock glock;
 private(A) struct bpf_list_head ghead __contains(foo, node);
 private(B) struct bpf_spin_lock glock2;
 
+*/
 #endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 1d9017240e19..1b7ed1d3a9bb 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -59,10 +59,12 @@ CHECK(kptr, push_back, &f->head);
 CHECK(kptr, pop_front, &f->head);
 CHECK(kptr, pop_back, &f->head);
 
+/* FIXME
 CHECK(global, push_front, &ghead);
 CHECK(global, push_back, &ghead);
 CHECK(global, pop_front, &ghead);
 CHECK(global, pop_back, &ghead);
+*/
 
 CHECK(map, push_front, &v->head);
 CHECK(map, push_back, &v->head);
@@ -89,23 +91,15 @@ CHECK(inner_map, pop_back, &iv->head);
 
 #define CHECK_OP(op)                                           \
 	CHECK(kptr_kptr, op, &f1->lock, &f2->head);            \
-	CHECK(kptr_global, op, &f1->lock, &ghead);             \
 	CHECK(kptr_map, op, &f1->lock, &v->head);              \
 	CHECK(kptr_inner_map, op, &f1->lock, &iv->head);       \
                                                                \
-	CHECK(global_global, op, &glock2, &ghead);             \
-	CHECK(global_kptr, op, &glock, &f1->head);             \
-	CHECK(global_map, op, &glock, &v->head);               \
-	CHECK(global_inner_map, op, &glock, &iv->head);        \
-                                                               \
 	CHECK(map_map, op, &v->lock, &v2->head);               \
 	CHECK(map_kptr, op, &v->lock, &f2->head);              \
-	CHECK(map_global, op, &v->lock, &ghead);               \
 	CHECK(map_inner_map, op, &v->lock, &iv->head);         \
                                                                \
 	CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
 	CHECK(inner_map_kptr, op, &iv->lock, &f2->head);       \
-	CHECK(inner_map_global, op, &iv->lock, &ghead);        \
 	CHECK(inner_map_map, op, &iv->lock, &v->head);
 
 CHECK_OP(push_front);
@@ -117,6 +111,7 @@ CHECK_OP(pop_back);
 #undef CHECK_OP
 #undef INIT
 
+/* FIXME
 SEC("?kprobe/xyz")
 int map_compat_kprobe(void *ctx)
 {
@@ -158,6 +153,7 @@ int map_compat_raw_tp_w(void *ctx)
 	bpf_list_push_front(&ghead, NULL);
 	return 0;
 }
+*/
 
 SEC("?tc")
 int obj_type_id_oor(void *ctx)
@@ -303,6 +299,7 @@ int direct_write_node(void *ctx)
 	return 0;
 }
 
+/* FIXME
 static __always_inline
 int write_after_op(void (*push_op)(void *head, void *node))
 {
@@ -506,6 +503,7 @@ int incorrect_head_var_off2(struct __sk_buff *ctx)
 
 	return 0;
 }
+*/
 
 SEC("?tc")
 int incorrect_head_off1(void *ctx)
@@ -529,6 +527,7 @@ int incorrect_head_off1(void *ctx)
 	return 0;
 }
 
+/* FIXME
 SEC("?tc")
 int incorrect_head_off2(void *ctx)
 {
@@ -545,6 +544,7 @@ int incorrect_head_off2(void *ctx)
 
 	return 0;
 }
+*/
 
 static __always_inline
 int pop_ptr_off(void *(*op)(void *head))
-- 
cgit v1.2.3-58-ga151


From a61bd7b9fef34484a3fe144a62f4ec78cc42e20e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 16 Nov 2022 15:01:22 -0500
Subject: selftests: add a selftest for sctp vrf

This patch adds 12 small test cases: 01-04 test for the sysctl
net.sctp.l3mdev_accept. 05-10 test for only binding to a right
l3mdev device, the connection can be created. 11-12 test for
two socks binding to different l3mdev devices at the same time,
each of them can process the packets from the corresponding
peer. The tests run for both IPv4 and IPv6 SCTP.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile     |   2 +
 tools/testing/selftests/net/sctp_hello.c | 137 ++++++++++++++++++++++++
 tools/testing/selftests/net/sctp_vrf.sh  | 178 +++++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+)
 create mode 100644 tools/testing/selftests/net/sctp_hello.c
 create mode 100755 tools/testing/selftests/net/sctp_vrf.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index cec4800cb017..880e6ded6ed5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -72,6 +72,8 @@ TEST_GEN_PROGS += sk_bind_sendto_listen
 TEST_GEN_PROGS += sk_connect_zero_addr
 TEST_PROGS += test_ingress_egress_chaining.sh
 TEST_GEN_PROGS += so_incoming_cpu
+TEST_PROGS += sctp_vrf.sh
+TEST_GEN_FILES += sctp_hello
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
new file mode 100644
index 000000000000..f02f1f95d227
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len)
+{
+	if (ss->ss_family == AF_INET) {
+		struct sockaddr_in *a = (struct sockaddr_in *)ss;
+
+		a->sin_addr.s_addr = inet_addr(ip);
+		a->sin_port = htons(atoi(port));
+		*len = sizeof(*a);
+	} else {
+		struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss;
+
+		a->sin6_family = AF_INET6;
+		inet_pton(AF_INET6, ip, &a->sin6_addr);
+		a->sin6_port = htons(atoi(port));
+		*len = sizeof(*a);
+	}
+}
+
+static int do_client(int argc, char *argv[])
+{
+	struct sockaddr_storage ss;
+	char buf[] = "hello";
+	int csk, ret, len;
+
+	if (argc < 5) {
+		printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]);
+		return -1;
+	}
+
+	bzero((void *)&ss, sizeof(ss));
+	ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+	csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+	if (csk < 0) {
+		printf("failed to create socket\n");
+		return -1;
+	}
+
+	if (argc >= 7) {
+		set_addr(&ss, argv[5], argv[6], &len);
+		ret = bind(csk, (struct sockaddr *)&ss, len);
+		if (ret < 0) {
+			printf("failed to bind to address\n");
+			return -1;
+		}
+	}
+
+	set_addr(&ss, argv[3], argv[4], &len);
+	ret = connect(csk, (struct sockaddr *)&ss, len);
+	if (ret < 0) {
+		printf("failed to connect to peer\n");
+		return -1;
+	}
+
+	ret = send(csk, buf, strlen(buf) + 1, 0);
+	if (ret < 0) {
+		printf("failed to send msg %d\n", ret);
+		return -1;
+	}
+	close(csk);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage ss;
+	int lsk, csk, ret, len;
+	char buf[20];
+
+	if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+		printf("%s server|client ...\n", argv[0]);
+		return -1;
+	}
+
+	if (!strcmp(argv[1], "client"))
+		return do_client(argc, argv);
+
+	if (argc < 5) {
+		printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]);
+		return -1;
+	}
+
+	ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+	lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+	if (lsk < 0) {
+		printf("failed to create lsk\n");
+		return -1;
+	}
+
+	if (argc >= 6) {
+		ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE,
+				 argv[5], strlen(argv[5]) + 1);
+		if (ret < 0) {
+			printf("failed to bind to device\n");
+			return -1;
+		}
+	}
+
+	set_addr(&ss, argv[3], argv[4], &len);
+	ret = bind(lsk, (struct sockaddr *)&ss, len);
+	if (ret < 0) {
+		printf("failed to bind to address\n");
+		return -1;
+	}
+
+	ret = listen(lsk, 5);
+	if (ret < 0) {
+		printf("failed to listen on port\n");
+		return -1;
+	}
+
+	csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL);
+	if (csk < 0) {
+		printf("failed to accept new client\n");
+		return -1;
+	}
+
+	ret = recv(csk, buf, sizeof(buf), 0);
+	if (ret <= 0) {
+		printf("failed to recv msg %d\n", ret);
+		return -1;
+	}
+	close(csk);
+	close(lsk);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
new file mode 100755
index 000000000000..c721e952e5f3
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP VRF.
+# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1
+#                                                  SERVER_NS
+#       CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2
+
+CLIENT_NS1="client-ns1"
+CLIENT_NS2="client-ns2"
+CLIENT_IP4="10.0.0.1"
+CLIENT_IP6="2000::1"
+CLIENT_PORT=1234
+
+SERVER_NS="server-ns"
+SERVER_IP4="10.0.0.2"
+SERVER_IP6="2000::2"
+SERVER_PORT=1234
+
+setup() {
+	modprobe sctp
+	modprobe sctp_diag
+	ip netns add $CLIENT_NS1
+	ip netns add $CLIENT_NS2
+	ip netns add $SERVER_NS
+
+	ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+
+	ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
+	ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
+
+	ip -n $CLIENT_NS1 link set veth1 up
+	ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1
+	ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1
+
+	ip -n $CLIENT_NS2 link set veth1 up
+	ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1
+	ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1
+
+	ip -n $SERVER_NS link add dummy1 type dummy
+	ip -n $SERVER_NS link set dummy1 up
+	ip -n $SERVER_NS link add vrf-1 type vrf table 10
+	ip -n $SERVER_NS link add vrf-2 type vrf table 20
+	ip -n $SERVER_NS link set vrf-1 up
+	ip -n $SERVER_NS link set vrf-2 up
+	ip -n $SERVER_NS link set veth1 master vrf-1
+	ip -n $SERVER_NS link set veth2 master vrf-2
+
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2
+
+	ip -n $SERVER_NS link set veth1 up
+	ip -n $SERVER_NS link set veth2 up
+	ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4
+	ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4
+	ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4
+	ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6
+	ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6
+	ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6
+}
+
+cleanup() {
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	ip netns del "$CLIENT_NS1"
+	ip netns del "$CLIENT_NS2"
+	ip netns del "$SERVER_NS"
+}
+
+wait_server() {
+	local IFACE=$1
+	local CNT=0
+
+	until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
+		grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
+		[ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+		sleep 0.1
+	done
+}
+
+do_test() {
+	local CLIENT_NS=$1
+	local IFACE=$2
+
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE || return $RET
+	timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+	RET=$?
+	return $RET
+}
+
+do_testx() {
+	local IFACE1=$1
+	local IFACE2=$2
+
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE1 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE1 || return $RET
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE2 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE2 || return $RET
+	timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+	timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+	RET=$?
+	return $RET
+}
+
+testup() {
+	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+	echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
+	do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N "
+	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+	echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
+	do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N "
+	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 05: bind veth2 in server, connect from client 1, N "
+	do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 06: bind veth1 in server, connect from client 1, Y "
+	do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y "
+	do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N "
+	do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y "
+	do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N "
+	do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y "
+	do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+	do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+}
+
+trap cleanup EXIT
+setup && echo "Testing For SCTP VRF:" && \
+CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" &&
+CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***"
+exit $?
-- 
cgit v1.2.3-58-ga151


From 97c11d6e31547183e2404087f0fb23b34dbe2cc3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 19 Nov 2022 00:29:38 +0530
Subject: selftests/bpf: Skip spin lock failure test on s390x

Instead of adding the whole test to DENYLIST.s390x, which also has
success test cases that should be run, just skip over failure test
cases in case the JIT does not support kfuncs.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118185938.2139616-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/spin_lock.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 72282e92a78a..d9270bd3d920 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -68,6 +68,12 @@ static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg)
 	if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail"))
 		goto end;
 
+	/* Skip check if JIT does not support kfuncs */
+	if (strstr(log_buf, "JIT does not support calling kernel function")) {
+		test__skip();
+		goto end;
+	}
+
 	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
 		fprintf(stderr, "Expected: %s\n", err_msg);
 		fprintf(stderr, "Verifier: %s\n", log_buf);
-- 
cgit v1.2.3-58-ga151


From f80e16b614f303b520465b7c704ff89fab800f2f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 17 Nov 2022 11:28:24 -0800
Subject: libbpf: Ignore hashmap__find() result explicitly in btf_dump

Coverity is reporting that btf_dump_name_dups() doesn't check return
result of hashmap__find() call. This is intentional, so make it explicit
with (void) cast.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221117192824.4093553-1-andrii@kernel.org
---
 tools/lib/bpf/btf_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e9f849d82124..deb2bc9a0a7b 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1543,7 +1543,7 @@ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
 	if (!new_name)
 		return 1;
 
-	hashmap__find(name_map, orig_name, &dup_cnt);
+	(void)hashmap__find(name_map, orig_name, &dup_cnt);
 	dup_cnt++;
 
 	err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL);
-- 
cgit v1.2.3-58-ga151


From 3f00c52393445ed49aadc1a567aa502c6333b1a1 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 19 Nov 2022 23:10:02 -0600
Subject: bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs

Kfuncs currently support specifying the KF_TRUSTED_ARGS flag to signal
to the verifier that it should enforce that a BPF program passes it a
"safe", trusted pointer. Currently, "safe" means that the pointer is
either PTR_TO_CTX, or is refcounted. There may be cases, however, where
the kernel passes a BPF program a safe / trusted pointer to an object
that the BPF program wishes to use as a kptr, but because the object
does not yet have a ref_obj_id from the perspective of the verifier, the
program would be unable to pass it to a KF_ACQUIRE | KF_TRUSTED_ARGS
kfunc.

The solution is to expand the set of pointers that are considered
trusted according to KF_TRUSTED_ARGS, so that programs can invoke kfuncs
with these pointers without getting rejected by the verifier.

There is already a PTR_UNTRUSTED flag that is set in some scenarios,
such as when a BPF program reads a kptr directly from a map
without performing a bpf_kptr_xchg() call. These pointers of course can
and should be rejected by the verifier. Unfortunately, however,
PTR_UNTRUSTED does not cover all the cases for safety that need to
be addressed to adequately protect kfuncs. Specifically, pointers
obtained by a BPF program "walking" a struct are _not_ considered
PTR_UNTRUSTED according to BPF. For example, say that we were to add a
kfunc called bpf_task_acquire(), with KF_ACQUIRE | KF_TRUSTED_ARGS, to
acquire a struct task_struct *. If we only used PTR_UNTRUSTED to signal
that a task was unsafe to pass to a kfunc, the verifier would mistakenly
allow the following unsafe BPF program to be loaded:

SEC("tp_btf/task_newtask")
int BPF_PROG(unsafe_acquire_task,
             struct task_struct *task,
             u64 clone_flags)
{
        struct task_struct *acquired, *nested;

        nested = task->last_wakee;

        /* Would not be rejected by the verifier. */
        acquired = bpf_task_acquire(nested);
        if (!acquired)
                return 0;

        bpf_task_release(acquired);
        return 0;
}

To address this, this patch defines a new type flag called PTR_TRUSTED
which tracks whether a PTR_TO_BTF_ID pointer is safe to pass to a
KF_TRUSTED_ARGS kfunc or a BPF helper function. PTR_TRUSTED pointers are
passed directly from the kernel as a tracepoint or struct_ops callback
argument. Any nested pointer that is obtained from walking a PTR_TRUSTED
pointer is no longer PTR_TRUSTED. From the example above, the struct
task_struct *task argument is PTR_TRUSTED, but the 'nested' pointer
obtained from 'task->last_wakee' is not PTR_TRUSTED.

A subsequent patch will add kfuncs for storing a task kfunc as a kptr,
and then another patch will add selftests to validate.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst                       | 30 +++++-----
 include/linux/bpf.h                                | 30 ++++++++++
 include/linux/bpf_verifier.h                       |  7 +++
 include/linux/btf.h                                | 65 ++++++++++++--------
 kernel/bpf/btf.c                                   |  8 +++
 kernel/bpf/verifier.c                              | 69 ++++++++++++++++++----
 kernel/trace/bpf_trace.c                           |  2 +-
 net/ipv4/bpf_tcp_ca.c                              |  4 +-
 tools/testing/selftests/bpf/verifier/calls.c       |  2 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  |  4 +-
 10 files changed, 164 insertions(+), 57 deletions(-)

(limited to 'tools')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 3b1501c3b6cd..90774479ab7a 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -161,22 +161,20 @@ KF_ACQUIRE and KF_RET_NULL flags.
 --------------------------
 
 The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
-indicates that the all pointer arguments will always have a guaranteed lifetime,
-and pointers to kernel objects are always passed to helpers in their unmodified
-form (as obtained from acquire kfuncs).
-
-It can be used to enforce that a pointer to a refcounted object acquired from a
-kfunc or BPF helper is passed as an argument to this kfunc without any
-modifications (e.g. pointer arithmetic) such that it is trusted and points to
-the original object.
-
-Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs,
-but those can have a non-zero offset.
-
-This flag is often used for kfuncs that operate (change some property, perform
-some operation) on an object that was obtained using an acquire kfunc. Such
-kfuncs need an unchanged pointer to ensure the integrity of the operation being
-performed on the expected object.
+indicates that the all pointer arguments are valid, and that all pointers to
+BTF objects have been passed in their unmodified form (that is, at a zero
+offset, and without having been obtained from walking another pointer).
+
+There are two types of pointers to kernel objects which are considered "valid":
+
+1. Pointers which are passed as tracepoint or struct_ops callback arguments.
+2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc.
+
+Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
+KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
+
+The definition of "valid" pointers is subject to change at any time, and has
+absolutely no ABI stability guarantees.
 
 2.4.6 KF_SLEEPABLE flag
 -----------------------
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8b32376ce746..c9eafa67f2a2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -543,6 +543,35 @@ enum bpf_type_flag {
 	 */
 	MEM_ALLOC		= BIT(11 + BPF_BASE_TYPE_BITS),
 
+	/* PTR was passed from the kernel in a trusted context, and may be
+	 * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions.
+	 * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above.
+	 * PTR_UNTRUSTED refers to a kptr that was read directly from a map
+	 * without invoking bpf_kptr_xchg(). What we really need to know is
+	 * whether a pointer is safe to pass to a kfunc or BPF helper function.
+	 * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF
+	 * helpers, they do not cover all possible instances of unsafe
+	 * pointers. For example, a pointer that was obtained from walking a
+	 * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the
+	 * fact that it may be NULL, invalid, etc. This is due to backwards
+	 * compatibility requirements, as this was the behavior that was first
+	 * introduced when kptrs were added. The behavior is now considered
+	 * deprecated, and PTR_UNTRUSTED will eventually be removed.
+	 *
+	 * PTR_TRUSTED, on the other hand, is a pointer that the kernel
+	 * guarantees to be valid and safe to pass to kfuncs and BPF helpers.
+	 * For example, pointers passed to tracepoint arguments are considered
+	 * PTR_TRUSTED, as are pointers that are passed to struct_ops
+	 * callbacks. As alluded to above, pointers that are obtained from
+	 * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a
+	 * struct task_struct *task is PTR_TRUSTED, then accessing
+	 * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored
+	 * in a BPF register. Similarly, pointers passed to certain programs
+	 * types such as kretprobes are not guaranteed to be valid, as they may
+	 * for example contain an object that was recently freed.
+	 */
+	PTR_TRUSTED		= BIT(12 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
@@ -636,6 +665,7 @@ enum bpf_return_type {
 	RET_PTR_TO_RINGBUF_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM,
 	RET_PTR_TO_DYNPTR_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+	RET_PTR_TO_BTF_ID_TRUSTED	= PTR_TRUSTED	 | RET_PTR_TO_BTF_ID,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 608dde740fef..545152ac136c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -680,4 +680,11 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	}
 }
 
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+
+static inline bool bpf_type_has_unsafe_modifiers(u32 type)
+{
+	return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
+}
+
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index d5b26380a60f..d38aa4251c28 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -19,36 +19,53 @@
 #define KF_RELEASE	(1 << 1) /* kfunc is a release function */
 #define KF_RET_NULL	(1 << 2) /* kfunc returns a pointer that may be NULL */
 #define KF_KPTR_GET	(1 << 3) /* kfunc returns reference to a kptr */
-/* Trusted arguments are those which are meant to be referenced arguments with
- * unchanged offset. It is used to enforce that pointers obtained from acquire
- * kfuncs remain unmodified when being passed to helpers taking trusted args.
+/* Trusted arguments are those which are guaranteed to be valid when passed to
+ * the kfunc. It is used to enforce that pointers obtained from either acquire
+ * kfuncs, or from the main kernel on a tracepoint or struct_ops callback
+ * invocation, remain unmodified when being passed to helpers taking trusted
+ * args.
  *
- * Consider
- *	struct foo {
- *		int data;
- *		struct foo *next;
- *	};
+ * Consider, for example, the following new task tracepoint:
  *
- *	struct bar {
- *		int data;
- *		struct foo f;
- *	};
+ *	SEC("tp_btf/task_newtask")
+ *	int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags)
+ *	{
+ *		...
+ *	}
  *
- *	struct foo *f = alloc_foo(); // Acquire kfunc
- *	struct bar *b = alloc_bar(); // Acquire kfunc
+ * And the following kfunc:
  *
- * If a kfunc set_foo_data() wants to operate only on the allocated object, it
- * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like:
+ *	BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
  *
- *	set_foo_data(f, 42);	   // Allowed
- *	set_foo_data(f->next, 42); // Rejected, non-referenced pointer
- *	set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type
- *	set_foo_data(&b->f, 42);   // Rejected, referenced, but bad offset
+ * All invocations to the kfunc must pass the unmodified, unwalked task:
  *
- * In the final case, usually for the purposes of type matching, it is deduced
- * by looking at the type of the member at the offset, but due to the
- * requirement of trusted argument, this deduction will be strict and not done
- * for this case.
+ *	bpf_task_acquire(task);		    // Allowed
+ *	bpf_task_acquire(task->last_wakee); // Rejected, walked task
+ *
+ * Programs may also pass referenced tasks directly to the kfunc:
+ *
+ *	struct task_struct *acquired;
+ *
+ *	acquired = bpf_task_acquire(task);	// Allowed, same as above
+ *	bpf_task_acquire(acquired);		// Allowed
+ *	bpf_task_acquire(task);			// Allowed
+ *	bpf_task_acquire(acquired->last_wakee); // Rejected, walked task
+ *
+ * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or
+ * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these
+ * pointers are guaranteed to be safe. For example, the following BPF program
+ * would be rejected:
+ *
+ * SEC("kretprobe/free_task")
+ * int BPF_PROG(free_task_probe, struct task_struct *tsk)
+ * {
+ *	struct task_struct *acquired;
+ *
+ *	acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer
+ *	bpf_task_release(acquired);
+ *
+ *	return 0;
+ * }
  */
 #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
 #define KF_SLEEPABLE    (1 << 5) /* kfunc may sleep */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f7d5fab61535..d52054ec69c9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5799,6 +5799,11 @@ static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
 	return nr_args + 1;
 }
 
+static bool prog_type_args_trusted(enum bpf_prog_type prog_type)
+{
+	return prog_type == BPF_PROG_TYPE_TRACING || prog_type == BPF_PROG_TYPE_STRUCT_OPS;
+}
+
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info)
@@ -5942,6 +5947,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 
 	info->reg_type = PTR_TO_BTF_ID;
+	if (prog_type_args_trusted(prog->type))
+		info->reg_type |= PTR_TRUSTED;
+
 	if (tgt_prog) {
 		enum bpf_prog_type tgt_type;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 67a6f11d953c..5bc9d84d7924 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -589,12 +589,13 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 			strncpy(postfix, "_or_null", 16);
 	}
 
-	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s",
+	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s",
 		 type & MEM_RDONLY ? "rdonly_" : "",
 		 type & MEM_RINGBUF ? "ringbuf_" : "",
 		 type & MEM_USER ? "user_" : "",
 		 type & MEM_PERCPU ? "percpu_" : "",
-		 type & PTR_UNTRUSTED ? "untrusted_" : ""
+		 type & PTR_UNTRUSTED ? "untrusted_" : "",
+		 type & PTR_TRUSTED ? "trusted_" : ""
 	);
 
 	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
@@ -3856,7 +3857,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 			       struct bpf_reg_state *reg, u32 regno)
 {
 	const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
-	int perm_flags = PTR_MAYBE_NULL;
+	int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
 	const char *reg_name = "";
 
 	/* Only unreferenced case accepts untrusted pointers */
@@ -4732,6 +4733,9 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	if (type_flag(reg->type) & PTR_UNTRUSTED)
 		flag |= PTR_UNTRUSTED;
 
+	/* Any pointer obtained from walking a trusted pointer is no longer trusted. */
+	flag &= ~PTR_TRUSTED;
+
 	if (atype == BPF_READ && value_regno >= 0)
 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
 
@@ -5844,6 +5848,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = {
 		PTR_TO_TCP_SOCK,
 		PTR_TO_XDP_SOCK,
 		PTR_TO_BTF_ID,
+		PTR_TO_BTF_ID | PTR_TRUSTED,
 	},
 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
 };
@@ -5884,8 +5889,18 @@ static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
-static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
-static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
+static const struct bpf_reg_types btf_ptr_types = {
+	.types = {
+		PTR_TO_BTF_ID,
+		PTR_TO_BTF_ID | PTR_TRUSTED,
+	},
+};
+static const struct bpf_reg_types percpu_btf_ptr_types = {
+	.types = {
+		PTR_TO_BTF_ID | MEM_PERCPU,
+		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
+	}
+};
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5973,7 +5988,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
 	return -EACCES;
 
 found:
-	if (reg->type == PTR_TO_BTF_ID) {
+	if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
 		/* For bpf_sk_release, it needs to match against first member
 		 * 'struct sock_common', hence make an exception for it. This
 		 * allows bpf_sk_release to work for multiple socket types.
@@ -6055,6 +6070,8 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	 */
 	case PTR_TO_BTF_ID:
 	case PTR_TO_BTF_ID | MEM_ALLOC:
+	case PTR_TO_BTF_ID | PTR_TRUSTED:
+	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 		/* When referenced PTR_TO_BTF_ID is passed to release function,
 		 * it's fixed offset must be 0.	In the other cases, fixed offset
 		 * can be non-zero.
@@ -7939,6 +7956,25 @@ static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
 	return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
 }
 
+static bool is_trusted_reg(const struct bpf_reg_state *reg)
+{
+	/* A referenced register is always trusted. */
+	if (reg->ref_obj_id)
+		return true;
+
+	/* If a register is not referenced, it is trusted if it has either the
+	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
+	 * other type modifiers may be safe, but we elect to take an opt-in
+	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
+	 * not.
+	 *
+	 * Eventually, we should make PTR_TRUSTED the single source of truth
+	 * for whether a register is trusted.
+	 */
+	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
+	       !bpf_type_has_unsafe_modifiers(reg->type);
+}
+
 static bool __kfunc_param_match_suffix(const struct btf *btf,
 				       const struct btf_param *arg,
 				       const char *suffix)
@@ -8220,7 +8256,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
 	const char *reg_ref_tname;
 	u32 reg_ref_id;
 
-	if (reg->type == PTR_TO_BTF_ID) {
+	if (base_type(reg->type) == PTR_TO_BTF_ID) {
 		reg_btf = reg->btf;
 		reg_ref_id = reg->btf_id;
 	} else {
@@ -8366,6 +8402,7 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
 		ptr = reg->map_ptr;
 		break;
 	case PTR_TO_BTF_ID | MEM_ALLOC:
+	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 		ptr = reg->btf;
 		break;
 	default:
@@ -8596,8 +8633,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		case KF_ARG_PTR_TO_BTF_ID:
 			if (!is_kfunc_trusted_args(meta))
 				break;
-			if (!reg->ref_obj_id) {
-				verbose(env, "R%d must be referenced\n", regno);
+
+			if (!is_trusted_reg(reg)) {
+				verbose(env, "R%d must be referenced or trusted\n", regno);
 				return -EINVAL;
 			}
 			fallthrough;
@@ -8702,9 +8740,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_BTF_ID:
 			/* Only base_type is checked, further checks are done here */
-			if (reg->type != PTR_TO_BTF_ID &&
-			    (!reg2btf_ids[base_type(reg->type)] || type_flag(reg->type))) {
-				verbose(env, "arg#%d expected pointer to btf or socket\n", i);
+			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
+			     bpf_type_has_unsafe_modifiers(reg->type)) &&
+			    !reg2btf_ids[base_type(reg->type)]) {
+				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
+				verbose(env, "expected %s or socket\n",
+					reg_type_str(env, base_type(reg->type) |
+							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
 				return -EINVAL;
 			}
 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
@@ -14713,6 +14755,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			break;
 		case PTR_TO_BTF_ID:
 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
+		case PTR_TO_BTF_ID | PTR_TRUSTED:
 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
@@ -14720,6 +14763,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		 * for this case.
 		 */
 		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+		case PTR_TO_BTF_ID | PTR_UNTRUSTED | PTR_TRUSTED:
+		case PTR_TO_BTF_ID | PTR_UNTRUSTED | MEM_ALLOC | PTR_TRUSTED:
 			if (type == BPF_READ) {
 				insn->code = BPF_LDX | BPF_PROBE_MEM |
 					BPF_SIZE((insn)->code);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f2d8d070d024..5b9008bc597b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -774,7 +774,7 @@ BPF_CALL_0(bpf_get_current_task_btf)
 const struct bpf_func_proto bpf_get_current_task_btf_proto = {
 	.func		= bpf_get_current_task_btf,
 	.gpl_only	= true,
-	.ret_type	= RET_PTR_TO_BTF_ID,
+	.ret_type	= RET_PTR_TO_BTF_ID_TRUSTED,
 	.ret_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
 };
 
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index d15c91de995f..4517d2bd186a 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -61,7 +61,9 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
 	if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info))
 		return false;
 
-	if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
+	if (base_type(info->reg_type) == PTR_TO_BTF_ID &&
+	    !bpf_type_has_unsafe_modifiers(info->reg_type) &&
+	    info->btf_id == sock_id)
 		/* promote it to tcp_sock */
 		info->btf_id = tcp_sock_id;
 
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 86d6fef2e3b4..3193915c5ee6 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -109,7 +109,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "arg#0 expected pointer to btf or socket",
+	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 3 },
 		{ "bpf_kfunc_call_test_release", 5 },
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 55cba01c99d5..9540164712b7 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -142,7 +142,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 expected pointer to btf or socket",
+	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_user_key", 2 },
 		{ "bpf_key_put", 4 },
@@ -163,7 +163,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 expected pointer to btf or socket",
+	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_system_key", 1 },
 		{ "bpf_key_put", 3 },
-- 
cgit v1.2.3-58-ga151


From fe147956fca4604b920e6be652abc9bea8ce8952 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 19 Nov 2022 23:10:04 -0600
Subject: bpf/selftests: Add selftests for new task kfuncs

A previous change added a series of kfuncs for storing struct
task_struct objects as referenced kptrs. This patch adds a new
task_kfunc test suite for validating their expected behavior.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221120051004.3605026-5-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../testing/selftests/bpf/prog_tests/task_kfunc.c  | 159 +++++++++++++
 .../selftests/bpf/progs/task_kfunc_common.h        |  71 ++++++
 .../selftests/bpf/progs/task_kfunc_failure.c       | 260 +++++++++++++++++++++
 .../selftests/bpf/progs/task_kfunc_success.c       | 149 ++++++++++++
 5 files changed, 640 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/task_kfunc.c
 create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_failure.c
 create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_success.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 072243af93b0..f70a677b38e5 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -54,6 +54,7 @@ skc_to_unix_sock                         # could not attach BPF object unexpecte
 socket_cookie                            # prog_attach unexpected error: -524                                          (trampoline)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
 tailcalls                                # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls      (?)
+task_kfunc                               # JIT does not support calling kernel function
 task_local_storage                       # failed to auto-attach program 'trace_exit_creds': -524                      (trampoline)
 test_bpffs                               # bpffs test  failed 255                                                      (iterator)
 test_bprm_opts                           # failed to auto-attach program 'secure_exec': -524                           (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
new file mode 100644
index 000000000000..4994fe6092cc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <unistd.h>
+
+#include "task_kfunc_failure.skel.h"
+#include "task_kfunc_success.skel.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct task_kfunc_success *open_load_task_kfunc_skel(void)
+{
+	struct task_kfunc_success *skel;
+	int err;
+
+	skel = task_kfunc_success__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return NULL;
+
+	skel->bss->pid = getpid();
+
+	err = task_kfunc_success__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	return skel;
+
+cleanup:
+	task_kfunc_success__destroy(skel);
+	return NULL;
+}
+
+static void run_success_test(const char *prog_name)
+{
+	struct task_kfunc_success *skel;
+	int status;
+	pid_t child_pid;
+	struct bpf_program *prog;
+	struct bpf_link *link = NULL;
+
+	skel = open_load_task_kfunc_skel();
+	if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+		return;
+
+	if (!ASSERT_OK(skel->bss->err, "pre_spawn_err"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "attached_link"))
+		goto cleanup;
+
+	child_pid = fork();
+	if (!ASSERT_GT(child_pid, -1, "child_pid"))
+		goto cleanup;
+	if (child_pid == 0)
+		_exit(0);
+	waitpid(child_pid, &status, 0);
+
+	ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+	bpf_link__destroy(link);
+	task_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+	"test_task_acquire_release_argument",
+	"test_task_acquire_release_current",
+	"test_task_acquire_leave_in_map",
+	"test_task_xchg_release",
+	"test_task_get_release",
+	"test_task_current_acquire_release",
+};
+
+static struct {
+	const char *prog_name;
+	const char *expected_err_msg;
+} failure_tests[] = {
+	{"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+	{"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
+	{"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
+	{"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+	{"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
+	{"task_kfunc_acquire_unreleased", "Unreleased reference"},
+	{"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
+	{"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
+	{"task_kfunc_get_null", "arg#0 expected pointer to map value"},
+	{"task_kfunc_xchg_unreleased", "Unreleased reference"},
+	{"task_kfunc_get_unreleased", "Unreleased reference"},
+	{"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
+	{"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
+	{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+	{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct task_kfunc_failure *skel;
+	int err, i;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = task_kfunc_failure__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+		struct bpf_program *prog;
+		const char *curr_name = failure_tests[i].prog_name;
+
+		prog = bpf_object__find_program_by_name(skel->obj, curr_name);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto cleanup;
+
+		bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
+	}
+
+	err = task_kfunc_failure__load(skel);
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	task_kfunc_failure__destroy(skel);
+}
+
+void test_task_kfunc(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+		if (!test__start_subtest(success_tests[i]))
+			continue;
+
+		run_success_test(success_tests[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+		if (!test__start_subtest(failure_tests[i].prog_name))
+			continue;
+
+		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
new file mode 100644
index 000000000000..160d6dde00be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TASK_KFUNC_COMMON_H
+#define _TASK_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __tasks_kfunc_map_value {
+	struct task_struct __kptr_ref * task;
+};
+
+struct hash_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct __tasks_kfunc_map_value);
+	__uint(max_entries, 1);
+} __tasks_kfunc_map SEC(".maps");
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
+{
+	s32 pid;
+	long status;
+
+	status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+	if (status)
+		return NULL;
+
+	return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+}
+
+static inline int tasks_kfunc_map_insert(struct task_struct *p)
+{
+	struct __tasks_kfunc_map_value local, *v;
+	long status;
+	struct task_struct *acquired, *old;
+	s32 pid;
+
+	status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+	if (status)
+		return status;
+
+	local.task = NULL;
+	status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+	if (status)
+		return status;
+
+	v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+	if (!v) {
+		bpf_map_delete_elem(&__tasks_kfunc_map, &pid);
+		return -ENOENT;
+	}
+
+	acquired = bpf_task_acquire(p);
+	old = bpf_kptr_xchg(&v->task, acquired);
+	if (old) {
+		bpf_task_release(old);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
+#endif /* _TASK_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
new file mode 100644
index 000000000000..93e934ddfcb6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ *         TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
+{
+	int status;
+
+	status = tasks_kfunc_map_insert(task);
+	if (status)
+		return NULL;
+
+	return tasks_kfunc_map_value_lookup(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+	struct __tasks_kfunc_map_value *v;
+
+	v = insert_lookup_task(task);
+	if (!v)
+		return 0;
+
+	/* Can't invoke bpf_task_acquire() on an untrusted pointer. */
+	acquired = bpf_task_acquire(v->task);
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
+
+	/* Can't invoke bpf_task_acquire() on a random frame pointer. */
+	acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("kretprobe/free_task")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_acquire(task);
+	/* Can't release a bpf_task_acquire()'d task without a NULL check. */
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	/* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */
+	acquired = bpf_task_acquire(task->last_wakee);
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	/* Can't invoke bpf_task_acquire() on a NULL pointer. */
+	acquired = bpf_task_acquire(NULL);
+	if (!acquired)
+		return 0;
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_acquire(task);
+
+	/* Acquired task is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+
+	/* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */
+	kptr = bpf_task_kptr_get(&task);
+	if (!kptr)
+		return 0;
+
+	bpf_task_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr, *acquired;
+
+	acquired = bpf_task_acquire(task);
+
+	/* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */
+	kptr = bpf_task_kptr_get(&acquired);
+	bpf_task_release(acquired);
+	if (!kptr)
+		return 0;
+
+	bpf_task_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+
+	/* Cannot use bpf_task_kptr_get() on a NULL pointer. */
+	kptr = bpf_task_kptr_get(NULL);
+	if (!kptr)
+		return 0;
+
+	bpf_task_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+	struct __tasks_kfunc_map_value *v;
+
+	v = insert_lookup_task(task);
+	if (!v)
+		return 0;
+
+	kptr = bpf_kptr_xchg(&v->task, NULL);
+	if (!kptr)
+		return 0;
+
+	/* Kptr retrieved from map is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+	struct __tasks_kfunc_map_value *v;
+
+	v = insert_lookup_task(task);
+	if (!v)
+		return 0;
+
+	kptr = bpf_task_kptr_get(&v->task);
+	if (!kptr)
+		return 0;
+
+	/* Kptr acquired above is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
+{
+	struct __tasks_kfunc_map_value *v;
+
+	v = insert_lookup_task(task);
+	if (!v)
+		return 0;
+
+	/* Can't invoke bpf_task_release() on an untrusted pointer. */
+	bpf_task_release(v->task);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired = (struct task_struct *)&clone_flags;
+
+	/* Cannot release random frame pointer. */
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
+{
+	struct __tasks_kfunc_map_value local, *v;
+	long status;
+	struct task_struct *acquired, *old;
+	s32 pid;
+
+	status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
+	if (status)
+		return 0;
+
+	local.task = NULL;
+	status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+	if (status)
+		return status;
+
+	v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+	if (!v)
+		return -ENOENT;
+
+	acquired = bpf_task_acquire(task);
+
+	old = bpf_kptr_xchg(&v->task, acquired);
+
+	/* old cannot be passed to bpf_task_release() without a NULL check. */
+	bpf_task_release(old);
+	bpf_task_release(old);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
+{
+	/* Cannot release trusted task pointer which was not acquired. */
+	bpf_task_release(task);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
new file mode 100644
index 000000000000..be4534b5ba2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ *         TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	return pid == cur_pid;
+}
+
+static int test_acquire_release(struct task_struct *task)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_acquire(task);
+	bpf_task_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
+{
+	if (!is_test_kfunc_task())
+		return 0;
+
+	return test_acquire_release(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
+{
+	if (!is_test_kfunc_task())
+		return 0;
+
+	return test_acquire_release(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
+{
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = tasks_kfunc_map_insert(task);
+	if (status)
+		err = 1;
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+	struct __tasks_kfunc_map_value *v;
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = tasks_kfunc_map_insert(task);
+	if (status) {
+		err = 1;
+		return 0;
+	}
+
+	v = tasks_kfunc_map_value_lookup(task);
+	if (!v) {
+		err = 2;
+		return 0;
+	}
+
+	kptr = bpf_kptr_xchg(&v->task, NULL);
+	if (!kptr) {
+		err = 3;
+		return 0;
+	}
+
+	bpf_task_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *kptr;
+	struct __tasks_kfunc_map_value *v;
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = tasks_kfunc_map_insert(task);
+	if (status) {
+		err = 1;
+		return 0;
+	}
+
+	v = tasks_kfunc_map_value_lookup(task);
+	if (!v) {
+		err = 2;
+		return 0;
+	}
+
+	kptr = bpf_task_kptr_get(&v->task);
+	if (!kptr) {
+		err = 3;
+		return 0;
+	}
+
+	bpf_task_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *current, *acquired;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	current = bpf_get_current_task_btf();
+	acquired = bpf_task_acquire(current);
+	bpf_task_release(acquired);
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 58d84bee58465cc9f6a63ca3931240419497f917 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sun, 20 Nov 2022 11:54:42 -0800
Subject: bpf: Add type cast unit tests

Three tests are added. One is from John Fastabend ({1]) which tests
tracing style access for xdp program from the kernel ctx.
Another is a tc test to test both kernel ctx tracing style access
and explicit non-ctx type cast. The third one is for negative tests
including two tests, a tp_bpf test where the bpf_rdonly_cast()
returns a untrusted ptr which cannot be used as helper argument,
and a tracepoint test where the kernel ctx is a u64.

Also added the test to DENYLIST.s390x since s390 does not currently
support calling kernel functions in JIT mode.

  [1] https://lore.kernel.org/bpf/20221109215242.1279993-1-john.fastabend@gmail.com/

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221120195442.3114844-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 tools/testing/selftests/bpf/prog_tests/type_cast.c | 114 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/type_cast.c      |  83 +++++++++++++++
 3 files changed, 198 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/type_cast.c
 create mode 100644 tools/testing/selftests/bpf/progs/type_cast.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index f70a677b38e5..12cf2159975e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -71,6 +71,7 @@ trace_printk                             # trace_printk__load unexpected error:
 trace_vprintk                            # trace_vprintk__open_and_load unexpected error: -9                           (?)
 tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
 trampoline_count                         # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22                  (trampoline)
+type_cast                                # JIT does not support calling kernel function
 unpriv_bpf_disabled                      # fentry
 user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
 verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c
new file mode 100644
index 000000000000..9317d5fa2635
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "type_cast.skel.h"
+
+static void test_xdp(void)
+{
+	struct type_cast *skel;
+	int err, prog_fd;
+	char buf[128];
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
+
+	skel = type_cast__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.md_xdp, true);
+	err = type_cast__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.md_xdp);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval");
+
+	ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
+	ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex");
+	ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
+	ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
+
+out:
+	type_cast__destroy(skel);
+}
+
+static void test_tc(void)
+{
+	struct type_cast *skel;
+	int err, prog_fd;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+
+	skel = type_cast__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.md_skb, true);
+	err = type_cast__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.md_skb);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "tc test_run retval");
+
+	ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len");
+	ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len");
+	ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len");
+	ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len");
+	ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare");
+
+out:
+	type_cast__destroy(skel);
+}
+
+static const char * const negative_tests[] = {
+	"untrusted_ptr",
+	"kctx_u64",
+};
+
+static void test_negative(void)
+{
+	struct bpf_program *prog;
+	struct type_cast *skel;
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(negative_tests); i++) {
+		skel = type_cast__open();
+		if (!ASSERT_OK_PTR(skel, "skel_open"))
+			return;
+
+		prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto out;
+		bpf_program__set_autoload(prog, true);
+		err = type_cast__load(skel);
+		ASSERT_ERR(err, "skel_load");
+out:
+		type_cast__destroy(skel);
+	}
+}
+
+void test_type_cast(void)
+{
+	if (test__start_subtest("xdp"))
+		test_xdp();
+	if (test__start_subtest("tc"))
+		test_tc();
+	if (test__start_subtest("negative"))
+		test_negative();
+}
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
new file mode 100644
index 000000000000..eb78e6f03129
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} enter_id SEC(".maps");
+
+#define	IFNAMSIZ 16
+
+int ifindex, ingress_ifindex;
+char name[IFNAMSIZ];
+unsigned int inum;
+unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+void *bpf_rdonly_cast(void *, __u32) __ksym;
+
+SEC("?xdp")
+int md_xdp(struct xdp_md *ctx)
+{
+	struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx);
+	struct net_device *dev;
+
+	dev = kctx->rxq->dev;
+	ifindex = dev->ifindex;
+	inum = dev->nd_net.net->ns.inum;
+	__builtin_memcpy(name, dev->name, IFNAMSIZ);
+	ingress_ifindex = ctx->ingress_ifindex;
+	return XDP_PASS;
+}
+
+SEC("?tc")
+int md_skb(struct __sk_buff *skb)
+{
+	struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb);
+	struct skb_shared_info *shared_info;
+	struct sk_buff *kskb2;
+
+	kskb_len = kskb->len;
+
+	/* Simulate the following kernel macro:
+	 *   #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
+	 */
+	shared_info = bpf_rdonly_cast(kskb->head + kskb->end,
+		bpf_core_type_id_kernel(struct skb_shared_info));
+	meta_len = shared_info->meta_len;
+	frag0_len = shared_info->frag_list->len;
+
+	/* kskb2 should be equal to kskb */
+	kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff));
+	kskb2_len = kskb2->len;
+	return 0;
+}
+
+SEC("?tp_btf/sys_enter")
+int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
+{
+	struct task_struct *task, *task_dup;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct));
+	(void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
+	return 0;
+}
+
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int kctx_u64(void *ctx)
+{
+	u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64));
+
+	(void)kctx;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-58-ga151


From 9b8107553424fd87955fed257a807672c2097297 Mon Sep 17 00:00:00 2001
From: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Date: Sun, 20 Nov 2022 11:25:55 +0000
Subject: bpftool: remove support of --legacy option for bpftool

Following:
  commit bd054102a8c7 ("libbpf: enforce strict libbpf 1.0 behaviors")
  commit 93b8952d223a ("libbpf: deprecate legacy BPF map definitions")

The --legacy option is no longer relevant as libbpf no longer supports
it. libbpf_set_strict_mode() is a no-op operation.

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Acked-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221120112515.38165-2-sahid.ferdjaoui@industrialdiscipline.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/common_options.rst    |  9 ---------
 tools/bpf/bpftool/Documentation/substitutions.rst     |  2 +-
 tools/bpf/bpftool/bash-completion/bpftool             |  2 +-
 tools/bpf/bpftool/main.c                              | 15 ---------------
 tools/bpf/bpftool/main.h                              |  3 +--
 tools/bpf/bpftool/prog.c                              |  5 -----
 tools/testing/selftests/bpf/test_bpftool_synctypes.py |  6 +++---
 7 files changed, 6 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
index 05350a1aadf9..30df7a707f02 100644
--- a/tools/bpf/bpftool/Documentation/common_options.rst
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -23,12 +23,3 @@
 	  Print all logs available, even debug-level information. This includes
 	  logs from libbpf as well as from the verifier, when attempting to
 	  load programs.
-
--l, --legacy
-	  Use legacy libbpf mode which has more relaxed BPF program
-	  requirements. By default, bpftool has more strict requirements
-	  about section names, changes pinning logic and doesn't support
-	  some of the older non-BTF map declarations.
-
-	  See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0
-	  for details.
diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst
index ccf1ffa0686c..827e3ffb1766 100644
--- a/tools/bpf/bpftool/Documentation/substitutions.rst
+++ b/tools/bpf/bpftool/Documentation/substitutions.rst
@@ -1,3 +1,3 @@
 .. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 
-.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** }
+.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** }
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 2957b42cab67..35f26f7c1124 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -261,7 +261,7 @@ _bpftool()
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
         local c='--version --json --pretty --bpffs --mapcompat --debug \
-	       --use-loader --base-btf --legacy'
+	       --use-loader --base-btf'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 337ab7977ea4..c53c553adcac 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,7 +31,6 @@ bool block_mount;
 bool verifier_logs;
 bool relaxed_maps;
 bool use_loader;
-bool legacy_libbpf;
 struct btf *base_btf;
 struct hashmap *refs_table;
 
@@ -160,7 +159,6 @@ static int do_version(int argc, char **argv)
 		jsonw_start_object(json_wtr);	/* features */
 		jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
 		jsonw_bool_field(json_wtr, "llvm", has_llvm);
-		jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf);
 		jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
 		jsonw_bool_field(json_wtr, "bootstrap", bootstrap);
 		jsonw_end_object(json_wtr);	/* features */
@@ -179,7 +177,6 @@ static int do_version(int argc, char **argv)
 		printf("features:");
 		print_feature("libbfd", has_libbfd, &nb_features);
 		print_feature("llvm", has_llvm, &nb_features);
-		print_feature("libbpf_strict", !legacy_libbpf, &nb_features);
 		print_feature("skeletons", has_skeletons, &nb_features);
 		print_feature("bootstrap", bootstrap, &nb_features);
 		printf("\n");
@@ -451,7 +448,6 @@ int main(int argc, char **argv)
 		{ "debug",	no_argument,	NULL,	'd' },
 		{ "use-loader",	no_argument,	NULL,	'L' },
 		{ "base-btf",	required_argument, NULL, 'B' },
-		{ "legacy",	no_argument,	NULL,	'l' },
 		{ 0 }
 	};
 	bool version_requested = false;
@@ -524,9 +520,6 @@ int main(int argc, char **argv)
 		case 'L':
 			use_loader = true;
 			break;
-		case 'l':
-			legacy_libbpf = true;
-			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
@@ -536,14 +529,6 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (!legacy_libbpf) {
-		/* Allow legacy map definitions for skeleton generation.
-		 * It will still be rejected if users use LIBBPF_STRICT_ALL
-		 * mode for loading generated skeleton.
-		 */
-		libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
-	}
-
 	argc -= optind;
 	argv += optind;
 	if (argc < 0)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index d4e8a1aef787..a84224b6a604 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
 #define HELP_SPEC_PROGRAM						\
 	"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
 #define HELP_SPEC_OPTIONS						\
-	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
+	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
 #define HELP_SPEC_MAP							\
 	"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
 #define HELP_SPEC_LINK							\
@@ -82,7 +82,6 @@ extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
 extern bool use_loader;
-extern bool legacy_libbpf;
 extern struct btf *base_btf;
 extern struct hashmap *refs_table;
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9d32ffb9f22e..2266958f203f 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1802,11 +1802,6 @@ err_unpin:
 	else
 		bpf_object__unpin_programs(obj, pinfile);
 err_close_obj:
-	if (!legacy_libbpf) {
-		p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n"
-		       "If it used to work for this object file but now doesn't, see --legacy option for more details.\n");
-	}
-
 	bpf_object__close(obj);
 err_free_reuse_maps:
 	for (i = 0; i < old_map_fds; i++)
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 9fe4c9336c6f..0cfece7ff4f8 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -309,11 +309,11 @@ class MainHeaderFileExtractor(SourceFileExtractor):
         commands), which looks to the lists of options in other source files
         but has different start and end markers:
 
-            "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}"
+            "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
 
         Return a set containing all options, such as:
 
-            {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+            {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
         start_marker = re.compile(f'"OPTIONS :=')
         pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
@@ -336,7 +336,7 @@ class ManSubstitutionsExtractor(SourceFileExtractor):
 
         Return a set containing all options, such as:
 
-            {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'}
+            {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
         start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
         pattern = re.compile('\*\*([\w/-]+)\*\*')
-- 
cgit v1.2.3-58-ga151


From 989f285159b84831c54d277112dcf803da51e722 Mon Sep 17 00:00:00 2001
From: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Date: Sun, 20 Nov 2022 11:26:07 +0000
Subject: bpftool: replace return value PTR_ERR(NULL) with 0

There is no reasons to keep PTR_ERR() when kern_btf=NULL, let's just
return 0.
This also cleans this part of code from using libbpf_get_error().

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Acked-by: Yonghong Song <yhs@fb.com>
Suggested-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221120112515.38165-3-sahid.ferdjaoui@industrialdiscipline.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/struct_ops.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e08a6ff2866c..d3cfdfef9b58 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -63,10 +63,8 @@ static __s32 get_map_info_type_id(void)
 		return map_info_type_id;
 
 	kern_btf = get_btf_vmlinux();
-	if (libbpf_get_error(kern_btf)) {
-		map_info_type_id = PTR_ERR(kern_btf);
-		return map_info_type_id;
-	}
+	if (!kern_btf)
+		return 0;
 
 	map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info",
 						  BTF_KIND_STRUCT);
-- 
cgit v1.2.3-58-ga151


From d2973ffd25c2295055349212ca26c18929e5e9f5 Mon Sep 17 00:00:00 2001
From: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Date: Sun, 20 Nov 2022 11:26:18 +0000
Subject: bpftool: fix error message when function can't register struct_ops

It is expected that errno be passed to strerror(). This also cleans
this part of code from using libbpf_get_error().

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Acked-by: Yonghong Song <yhs@fb.com>
Suggested-by: Quentin Monnet <quentin@isovalent.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221120112515.38165-4-sahid.ferdjaoui@industrialdiscipline.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/struct_ops.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index d3cfdfef9b58..ae9ad85f5cda 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -511,10 +511,9 @@ static int do_register(int argc, char **argv)
 			continue;
 
 		link = bpf_map__attach_struct_ops(map);
-		if (libbpf_get_error(link)) {
+		if (!link) {
 			p_err("can't register struct_ops %s: %s",
-			      bpf_map__name(map),
-			      strerror(-PTR_ERR(link)));
+			      bpf_map__name(map), strerror(errno));
 			nr_errs++;
 			continue;
 		}
-- 
cgit v1.2.3-58-ga151


From d1313e01271d2d8f33d6c82f1afb77e820a3540d Mon Sep 17 00:00:00 2001
From: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Date: Sun, 20 Nov 2022 11:26:32 +0000
Subject: bpftool: clean-up usage of libbpf_get_error()

bpftool is now totally compliant with libbpf 1.0 mode and is not
expected to be compiled with pre-1.0, let's clean-up the usage of
libbpf_get_error().

The changes stay aligned with returned errors always negative.

- In tools/bpf/bpftool/btf.c This fixes an uninitialized local
variable `err` in function do_dump() because it may now be returned
without having been set.
- This also removes the checks on NULL pointers before calling
btf__free() because that function already does the check.

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Link: https://lore.kernel.org/r/20221120112515.38165-5-sahid.ferdjaoui@industrialdiscipline.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c        | 19 ++++++++-----------
 tools/bpf/bpftool/btf_dumper.c |  2 +-
 tools/bpf/bpftool/gen.c        | 10 ++++------
 tools/bpf/bpftool/iter.c       | 10 ++++++----
 tools/bpf/bpftool/main.c       |  7 +++----
 tools/bpf/bpftool/map.c        | 15 +++++++--------
 tools/bpf/bpftool/prog.c       | 10 +++++-----
 tools/bpf/bpftool/struct_ops.c | 11 +++++------
 8 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index b87e4a7fd689..352290ba7b29 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -467,9 +467,8 @@ static int dump_btf_c(const struct btf *btf,
 	int err = 0, i;
 
 	d = btf_dump__new(btf, btf_dump_printf, NULL, NULL);
-	err = libbpf_get_error(d);
-	if (err)
-		return err;
+	if (!d)
+		return -errno;
 
 	printf("#ifndef __VMLINUX_H__\n");
 	printf("#define __VMLINUX_H__\n");
@@ -512,11 +511,9 @@ static struct btf *get_vmlinux_btf_from_sysfs(void)
 	struct btf *base;
 
 	base = btf__parse(sysfs_vmlinux, NULL);
-	if (libbpf_get_error(base)) {
-		p_err("failed to parse vmlinux BTF at '%s': %ld\n",
-		      sysfs_vmlinux, libbpf_get_error(base));
-		base = NULL;
-	}
+	if (!base)
+		p_err("failed to parse vmlinux BTF at '%s': %d\n",
+		      sysfs_vmlinux, -errno);
 
 	return base;
 }
@@ -559,7 +556,7 @@ static int do_dump(int argc, char **argv)
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
-	int err;
+	int err = 0;
 
 	if (!REQ_ARGS(2)) {
 		usage();
@@ -634,8 +631,8 @@ static int do_dump(int argc, char **argv)
 			base = get_vmlinux_btf_from_sysfs();
 
 		btf = btf__parse_split(*argv, base ?: base_btf);
-		err = libbpf_get_error(btf);
 		if (!btf) {
+			err = -errno;
 			p_err("failed to load BTF from %s: %s",
 			      *argv, strerror(errno));
 			goto done;
@@ -681,8 +678,8 @@ static int do_dump(int argc, char **argv)
 		}
 
 		btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
-		err = libbpf_get_error(btf);
 		if (!btf) {
+			err = -errno;
 			p_err("get btf by id (%u): %s", btf_id, strerror(errno));
 			goto done;
 		}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 19924b6ce796..eda71fdfe95a 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -75,7 +75,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
 		goto print;
 
 	prog_btf = btf__load_from_kernel_by_id(info.btf_id);
-	if (libbpf_get_error(prog_btf))
+	if (!prog_btf)
 		goto print;
 	func_type = btf__type_by_id(prog_btf, finfo.type_id);
 	if (!func_type || !btf_is_func(func_type))
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 01bb8d8f5568..2883660d6b67 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -252,9 +252,8 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
 	int err = 0;
 
 	d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
-	err = libbpf_get_error(d);
-	if (err)
-		return err;
+	if (!d)
+		return -errno;
 
 	bpf_object__for_each_map(map, obj) {
 		/* only generate definitions for memory-mapped internal maps */
@@ -976,13 +975,12 @@ static int do_skeleton(int argc, char **argv)
 		/* log_level1 + log_level2 + stats, but not stable UAPI */
 		opts.kernel_log_level = 1 + 2 + 4;
 	obj = bpf_object__open_mem(obj_data, file_sz, &opts);
-	err = libbpf_get_error(obj);
-	if (err) {
+	if (!obj) {
 		char err_buf[256];
 
+		err = -errno;
 		libbpf_strerror(err, err_buf, sizeof(err_buf));
 		p_err("failed to open BPF object file: %s", err_buf);
-		obj = NULL;
 		goto out;
 	}
 
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index a3e6b167153d..9a1d2365a297 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -4,6 +4,7 @@
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
+#include <errno.h>
 #include <unistd.h>
 #include <linux/err.h>
 #include <bpf/libbpf.h>
@@ -48,8 +49,8 @@ static int do_pin(int argc, char **argv)
 	}
 
 	obj = bpf_object__open(objfile);
-	err = libbpf_get_error(obj);
-	if (err) {
+	if (!obj) {
+		err = -errno;
 		p_err("can't open objfile %s", objfile);
 		goto close_map_fd;
 	}
@@ -62,13 +63,14 @@ static int do_pin(int argc, char **argv)
 
 	prog = bpf_object__next_program(obj, NULL);
 	if (!prog) {
+		err = -errno;
 		p_err("can't find bpf program in objfile %s", objfile);
 		goto close_obj;
 	}
 
 	link = bpf_program__attach_iter(prog, &iter_opts);
-	err = libbpf_get_error(link);
-	if (err) {
+	if (!link) {
+		err = -errno;
 		p_err("attach_iter failed for program %s",
 		      bpf_program__name(prog));
 		goto close_obj;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index c53c553adcac..08d0ac543c67 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -510,10 +510,9 @@ int main(int argc, char **argv)
 			break;
 		case 'B':
 			base_btf = btf__parse(optarg, NULL);
-			if (libbpf_get_error(base_btf)) {
-				p_err("failed to parse base BTF at '%s': %ld\n",
-				      optarg, libbpf_get_error(base_btf));
-				base_btf = NULL;
+			if (!base_btf) {
+				p_err("failed to parse base BTF at '%s': %d\n",
+				      optarg, -errno);
 				return -1;
 			}
 			break;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index d884070a2314..eb362bd3d2c9 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -786,18 +786,18 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf)
 	if (info->btf_vmlinux_value_type_id) {
 		if (!btf_vmlinux) {
 			btf_vmlinux = libbpf_find_kernel_btf();
-			err = libbpf_get_error(btf_vmlinux);
-			if (err) {
+			if (!btf_vmlinux) {
 				p_err("failed to get kernel btf");
-				return err;
+				return -errno;
 			}
 		}
 		*btf = btf_vmlinux;
 	} else if (info->btf_value_type_id) {
 		*btf = btf__load_from_kernel_by_id(info->btf_id);
-		err = libbpf_get_error(*btf);
-		if (err)
+		if (!*btf) {
+			err = -errno;
 			p_err("failed to get btf");
+		}
 	} else {
 		*btf = NULL;
 	}
@@ -807,14 +807,13 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf)
 
 static void free_map_kv_btf(struct btf *btf)
 {
-	if (!libbpf_get_error(btf) && btf != btf_vmlinux)
+	if (btf != btf_vmlinux)
 		btf__free(btf);
 }
 
 static void free_btf_vmlinux(void)
 {
-	if (!libbpf_get_error(btf_vmlinux))
-		btf__free(btf_vmlinux);
+	btf__free(btf_vmlinux);
 }
 
 static int
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 2266958f203f..cfc9fdc1e863 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -322,7 +322,7 @@ static void show_prog_metadata(int fd, __u32 num_maps)
 		return;
 
 	btf = btf__load_from_kernel_by_id(map_info.btf_id);
-	if (libbpf_get_error(btf))
+	if (!btf)
 		goto out_free;
 
 	t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -726,7 +726,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
 
 	if (info->btf_id) {
 		btf = btf__load_from_kernel_by_id(info->btf_id);
-		if (libbpf_get_error(btf)) {
+		if (!btf) {
 			p_err("failed to get btf");
 			return -1;
 		}
@@ -1663,7 +1663,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 		open_opts.kernel_log_level = 1 + 2 + 4;
 
 	obj = bpf_object__open_file(file, &open_opts);
-	if (libbpf_get_error(obj)) {
+	if (!obj) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
 	}
@@ -1882,7 +1882,7 @@ static int do_loader(int argc, char **argv)
 		open_opts.kernel_log_level = 1 + 2 + 4;
 
 	obj = bpf_object__open_file(file, &open_opts);
-	if (libbpf_get_error(obj)) {
+	if (!obj) {
 		p_err("failed to open object file");
 		goto err_close_obj;
 	}
@@ -2199,7 +2199,7 @@ static char *profile_target_name(int tgt_fd)
 	}
 
 	btf = btf__load_from_kernel_by_id(info.btf_id);
-	if (libbpf_get_error(btf)) {
+	if (!btf) {
 		p_err("failed to load btf for prog FD %d", tgt_fd);
 		goto out;
 	}
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index ae9ad85f5cda..903b80ff4e9a 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void)
 		return btf_vmlinux;
 
 	btf_vmlinux = libbpf_find_kernel_btf();
-	if (libbpf_get_error(btf_vmlinux))
+	if (!btf_vmlinux)
 		p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
 
 	return btf_vmlinux;
@@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
 	const char *st_ops_name;
 
 	kern_btf = get_btf_vmlinux();
-	if (libbpf_get_error(kern_btf))
+	if (!kern_btf)
 		return "<btf_vmlinux_not_found>";
 
 	t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
@@ -413,7 +413,7 @@ static int do_dump(int argc, char **argv)
 	}
 
 	kern_btf = get_btf_vmlinux();
-	if (libbpf_get_error(kern_btf))
+	if (!kern_btf)
 		return -1;
 
 	if (!json_output) {
@@ -496,7 +496,7 @@ static int do_register(int argc, char **argv)
 		open_opts.kernel_log_level = 1 + 2 + 4;
 
 	obj = bpf_object__open_file(file, &open_opts);
-	if (libbpf_get_error(obj))
+	if (!obj)
 		return -1;
 
 	set_max_rlimit();
@@ -590,8 +590,7 @@ int do_struct_ops(int argc, char **argv)
 
 	err = cmd_select(cmds, argc, argv, do_help);
 
-	if (!libbpf_get_error(btf_vmlinux))
-		btf__free(btf_vmlinux);
+	btf__free(btf_vmlinux);
 
 	return err;
 }
-- 
cgit v1.2.3-58-ga151


From 52df1a8aabadeba1e4c2fe157784637ddec76301 Mon Sep 17 00:00:00 2001
From: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Date: Sun, 20 Nov 2022 11:26:43 +0000
Subject: bpftool: remove function free_btf_vmlinux()

The function contains a single btf__free() call which can be
inlined. Credits to Yonghong Song.

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
Acked-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20221120112515.38165-6-sahid.ferdjaoui@industrialdiscipline.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index eb362bd3d2c9..88911d3aa2d9 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -811,11 +811,6 @@ static void free_map_kv_btf(struct btf *btf)
 		btf__free(btf);
 }
 
-static void free_btf_vmlinux(void)
-{
-	btf__free(btf_vmlinux);
-}
-
 static int
 map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
 	 bool show_header)
@@ -952,7 +947,7 @@ exit_close:
 		close(fds[i]);
 exit_free:
 	free(fds);
-	free_btf_vmlinux();
+	btf__free(btf_vmlinux);
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From d9e8da558580513318a36f4ecb1b6e28e41b4de0 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 17 Nov 2022 17:21:01 +0100
Subject: NFC: nci: Extend virtual NCI deinit test

Extend the test to check the scenario when NCI core tries to send data
to already closed device to ensure that nothing bad happens.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Bongsu Jeon <bongsu.jeon@samsung.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/nci/nci_dev.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 162c41e9bcae..1562aa7d60b0 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -888,6 +888,17 @@ TEST_F(NCI, deinit)
 			   &msg);
 	ASSERT_EQ(rc, 0);
 	EXPECT_EQ(get_dev_enable_state(&msg), 0);
+
+	/* Test that operations that normally send packets to the driver
+	 * don't cause issues when the device is already closed.
+	 * Note: the send of NFC_CMD_DEV_UP itself still succeeds it's just
+	 * that the device won't actually be up.
+	 */
+	close(self->virtual_nci_fd);
+	self->virtual_nci_fd = -1;
+	rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
+			       NFC_CMD_DEV_UP, self->dev_idex);
+	EXPECT_EQ(rc, 0);
 }
 
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-58-ga151


From 2a42461a88314bbeaa3dcad3d19a4bb3d9aa546f Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 21 Nov 2022 15:34:39 +0800
Subject: selftests/bpf: Add cgroup helper remove_cgroup()

Add remove_cgroup() to remove a cgroup which doesn't have any children
or live processes. It will be used by the following patch to test cgroup
iterator on a dead cgroup.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221121073440.1828292-3-houtao@huaweicloud.com
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/cgroup_helpers.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index dd1aa5afcf5a..9e95b37a7dff 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -333,6 +333,25 @@ int get_root_cgroup(void)
 	return fd;
 }
 
+/*
+ * remove_cgroup() - Remove a cgroup
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup(const char *relative_path)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	if (rmdir(cgroup_path))
+		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
 /**
  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
  * @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 3358734356ab..f099a166c94d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -18,6 +18,7 @@ int write_cgroup_file_parent(const char *relative_path, const char *file,
 int cgroup_setup_and_join(const char *relative_path);
 int get_root_cgroup(void);
 int create_and_get_cgroup(const char *relative_path);
+void remove_cgroup(const char *relative_path);
 unsigned long long get_cgroup_id(const char *relative_path);
 
 int join_cgroup(const char *relative_path);
-- 
cgit v1.2.3-58-ga151


From 8589e92675aa4727bede3f9230709624619844f3 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 21 Nov 2022 15:34:40 +0800
Subject: selftests/bpf: Add test for cgroup iterator on a dead cgroup

The test closes both iterator link fd and cgroup fd, and removes the
cgroup file to make a dead cgroup before reading from cgroup iterator.
It also uses kern_sync_rcu() and usleep() to wait for the release of
start cgroup. If the start cgroup is not pinned by cgroup iterator,
reading from iterator fd will trigger use-after-free.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20221121073440.1828292-4-houtao@huaweicloud.com
---
 .../testing/selftests/bpf/prog_tests/cgroup_iter.c | 76 ++++++++++++++++++++++
 1 file changed, 76 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index c4a2adb38da1..e02feb5fae97 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -189,6 +189,80 @@ static void test_walk_self_only(struct cgroup_iter *skel)
 			      BPF_CGROUP_ITER_SELF_ONLY, "self_only");
 }
 
+static void test_walk_dead_self_only(struct cgroup_iter *skel)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	char expected_output[128], buf[128];
+	const char *cgrp_name = "/dead";
+	union bpf_iter_link_info linfo;
+	int len, cgrp_fd, iter_fd;
+	struct bpf_link *link;
+	size_t left;
+	char *p;
+
+	cgrp_fd = create_and_get_cgroup(cgrp_name);
+	if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
+		return;
+
+	/* The cgroup will be dead during read() iteration, so it only has
+	 * epilogue in the output
+	 */
+	snprintf(expected_output, sizeof(expected_output), EPILOGUE);
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = cgrp_fd;
+	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		goto close_cgrp;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+		goto free_link;
+
+	/* Close link fd and cgroup fd */
+	bpf_link__destroy(link);
+	close(cgrp_fd);
+
+	/* Remove cgroup to mark it as dead */
+	remove_cgroup(cgrp_name);
+
+	/* Two kern_sync_rcu() and usleep() pairs are used to wait for the
+	 * releases of cgroup css, and the last kern_sync_rcu() and usleep()
+	 * pair is used to wait for the free of cgroup itself.
+	 */
+	kern_sync_rcu();
+	usleep(8000);
+	kern_sync_rcu();
+	usleep(8000);
+	kern_sync_rcu();
+	usleep(1000);
+
+	memset(buf, 0, sizeof(buf));
+	left = ARRAY_SIZE(buf);
+	p = buf;
+	while ((len = read(iter_fd, p, left)) > 0) {
+		p += len;
+		left -= len;
+	}
+
+	ASSERT_STREQ(buf, expected_output, "dead cgroup output");
+
+	/* read() after iter finishes should be ok. */
+	if (len == 0)
+		ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+	close(iter_fd);
+	return;
+free_link:
+	bpf_link__destroy(link);
+close_cgrp:
+	close(cgrp_fd);
+}
+
 void test_cgroup_iter(void)
 {
 	struct cgroup_iter *skel = NULL;
@@ -217,6 +291,8 @@ void test_cgroup_iter(void)
 		test_early_termination(skel);
 	if (test__start_subtest("cgroup_iter__self_only"))
 		test_walk_self_only(skel);
+	if (test__start_subtest("cgroup_iter__dead_self_only"))
+		test_walk_dead_self_only(skel);
 out:
 	cgroup_iter__destroy(skel);
 	cleanup_cgroups();
-- 
cgit v1.2.3-58-ga151


From 68f8e3d4b916531ea3bb8b83e35138cf78f2fce5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 21 Nov 2022 10:03:40 -0800
Subject: selftests/bpf: Make sure zero-len skbs aren't redirectable

LWT_XMIT to test L3 case, TC to test L2 case.

v2:
- s/veth_ifindex/ipip_ifindex/ in two places (Martin)
- add comment about which condition triggers the rejection (Martin)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20221121180340.1983627-2-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/empty_skb.c | 146 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/empty_skb.c      |  37 ++++++
 2 files changed, 183 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/empty_skb.c
 create mode 100644 tools/testing/selftests/bpf/progs/empty_skb.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
new file mode 100644
index 000000000000..32dd731e9070
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include "empty_skb.skel.h"
+
+#define SYS(cmd) ({ \
+	if (!ASSERT_OK(system(cmd), (cmd))) \
+		goto out; \
+})
+
+void test_empty_skb(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, tattr);
+	struct empty_skb *bpf_obj = NULL;
+	struct nstoken *tok = NULL;
+	struct bpf_program *prog;
+	char eth_hlen_pp[15];
+	char eth_hlen[14];
+	int veth_ifindex;
+	int ipip_ifindex;
+	int err;
+	int i;
+
+	struct {
+		const char *msg;
+		const void *data_in;
+		__u32 data_size_in;
+		int *ifindex;
+		int err;
+		int ret;
+		bool success_on_tc;
+	} tests[] = {
+		/* Empty packets are always rejected. */
+
+		{
+			/* BPF_PROG_RUN ETH_HLEN size check */
+			.msg = "veth empty ingress packet",
+			.data_in = NULL,
+			.data_size_in = 0,
+			.ifindex = &veth_ifindex,
+			.err = -EINVAL,
+		},
+		{
+			/* BPF_PROG_RUN ETH_HLEN size check */
+			.msg = "ipip empty ingress packet",
+			.data_in = NULL,
+			.data_size_in = 0,
+			.ifindex = &ipip_ifindex,
+			.err = -EINVAL,
+		},
+
+		/* ETH_HLEN-sized packets:
+		 * - can not be redirected at LWT_XMIT
+		 * - can be redirected at TC to non-tunneling dest
+		 */
+
+		{
+			/* __bpf_redirect_common */
+			.msg = "veth ETH_HLEN packet ingress",
+			.data_in = eth_hlen,
+			.data_size_in = sizeof(eth_hlen),
+			.ifindex = &veth_ifindex,
+			.ret = -ERANGE,
+			.success_on_tc = true,
+		},
+		{
+			/* __bpf_redirect_no_mac
+			 *
+			 * lwt: skb->len=0 <= skb_network_offset=0
+			 * tc: skb->len=14 <= skb_network_offset=14
+			 */
+			.msg = "ipip ETH_HLEN packet ingress",
+			.data_in = eth_hlen,
+			.data_size_in = sizeof(eth_hlen),
+			.ifindex = &ipip_ifindex,
+			.ret = -ERANGE,
+		},
+
+		/* ETH_HLEN+1-sized packet should be redirected. */
+
+		{
+			.msg = "veth ETH_HLEN+1 packet ingress",
+			.data_in = eth_hlen_pp,
+			.data_size_in = sizeof(eth_hlen_pp),
+			.ifindex = &veth_ifindex,
+		},
+		{
+			.msg = "ipip ETH_HLEN+1 packet ingress",
+			.data_in = eth_hlen_pp,
+			.data_size_in = sizeof(eth_hlen_pp),
+			.ifindex = &ipip_ifindex,
+		},
+	};
+
+	SYS("ip netns add empty_skb");
+	tok = open_netns("empty_skb");
+	SYS("ip link add veth0 type veth peer veth1");
+	SYS("ip link set dev veth0 up");
+	SYS("ip link set dev veth1 up");
+	SYS("ip addr add 10.0.0.1/8 dev veth0");
+	SYS("ip addr add 10.0.0.2/8 dev veth1");
+	veth_ifindex = if_nametoindex("veth0");
+
+	SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+	SYS("ip link set ipip0 up");
+	SYS("ip addr add 192.168.1.1/16 dev ipip0");
+	ipip_ifindex = if_nametoindex("ipip0");
+
+	bpf_obj = empty_skb__open_and_load();
+	if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+		goto out;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		bpf_object__for_each_program(prog, bpf_obj->obj) {
+			char buf[128];
+			bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+
+			tattr.data_in = tests[i].data_in;
+			tattr.data_size_in = tests[i].data_size_in;
+
+			tattr.data_size_out = 0;
+			bpf_obj->bss->ifindex = *tests[i].ifindex;
+			bpf_obj->bss->ret = 0;
+			err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr);
+			sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog));
+
+			if (at_tc && tests[i].success_on_tc)
+				ASSERT_GE(err, 0, buf);
+			else
+				ASSERT_EQ(err, tests[i].err, buf);
+			sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog));
+			if (at_tc && tests[i].success_on_tc)
+				ASSERT_GE(bpf_obj->bss->ret, 0, buf);
+			else
+				ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf);
+		}
+	}
+
+out:
+	if (bpf_obj)
+		empty_skb__destroy(bpf_obj);
+	if (tok)
+		close_netns(tok);
+	system("ip netns del empty_skb");
+}
diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c
new file mode 100644
index 000000000000..4b0cd6753251
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/empty_skb.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ifindex;
+int ret;
+
+SEC("lwt_xmit")
+int redirect_ingress(struct __sk_buff *skb)
+{
+	ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+	return 0;
+}
+
+SEC("lwt_xmit")
+int redirect_egress(struct __sk_buff *skb)
+{
+	ret = bpf_clone_redirect(skb, ifindex, 0);
+	return 0;
+}
+
+SEC("tc")
+int tc_redirect_ingress(struct __sk_buff *skb)
+{
+	ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+	return 0;
+}
+
+SEC("tc")
+int tc_redirect_egress(struct __sk_buff *skb)
+{
+	ret = bpf_clone_redirect(skb, ifindex, 0);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 837a3d66d698516ad2330e122eba9752ec3a48ed Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@rivosinc.com>
Date: Sat, 19 Nov 2022 18:18:41 +0100
Subject: selftests: net: Add cross-compilation support for BPF programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The selftests/net does not have proper cross-compilation support, and
does not properly state libbpf as a dependency. Mimic/copy the BPF
build from selftests/bpf, which has the nice side-effect that libbpf
is built as well.

Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://lore.kernel.org/r/20221119171841.2014936-1-bjorn@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/bpf/Makefile | 45 +++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
index 8ccaf8732eb2..a26cb94354f6 100644
--- a/tools/testing/selftests/net/bpf/Makefile
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -1,14 +1,51 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(abspath ../../../lib/bpf)
+APIDIR := $(abspath ../../../include/uapi)
+
 CCINCLUDE += -I../../bpf
-CCINCLUDE += -I../../../../lib
 CCINCLUDE += -I../../../../../usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+	mkdir -p $@
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
 all: $(TEST_CUSTOM_PROGS)
 
-$(OUTPUT)/%.o: %.c
-	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+$(TEST_CUSTOM_PROGS): $(BPFOBJ)
+	$(CLANG) -O2 -target bpf -c $(@:.o=.c) $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
+	   $(APIDIR)/linux/bpf.h					       \
+	   | $(BUILD_DIR)/libbpf
+	$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/     \
+		    EXTRA_CFLAGS='-g -O0'				       \
+		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)
 
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
-- 
cgit v1.2.3-58-ga151


From 0b2971a2703c015b5737d66688c2c7c81a5e391b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 22 Nov 2022 08:50:04 -0800
Subject: Revert "selftests/bpf: Temporarily disable linked list tests"

This reverts commit 0a2f85a1be4328d29aefa54684d10c23a3298fef.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/linked_list.c  | 21 +++++++--------------
 tools/testing/selftests/bpf/progs/linked_list.c     | 11 +----------
 tools/testing/selftests/bpf/progs/linked_list.h     |  2 --
 .../testing/selftests/bpf/progs/linked_list_fail.c  | 16 ++++++++--------
 4 files changed, 16 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 6170d36fe5fc..dd73d0a62c6e 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -24,9 +24,7 @@ static struct {
 	{ #test "_missing_lock_pop_back", \
 	  "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
 	TEST(kptr, 32)
-/* FIXME
 	TEST(global, 16)
-*/
 	TEST(map, 0)
 	TEST(inner_map, 0)
 #undef TEST
@@ -34,6 +32,9 @@ static struct {
 	{ #test "_kptr_incorrect_lock_" #op, \
 	  "held lock and object are not in the same allocation\n" \
 	  "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \
+	{ #test "_global_incorrect_lock_" #op, \
+	  "held lock and object are not in the same allocation\n" \
+	  "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
 	{ #test "_map_incorrect_lock_" #op, \
 	  "held lock and object are not in the same allocation\n" \
 	  "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
@@ -44,6 +45,10 @@ static struct {
 	TEST(kptr, push_back)
 	TEST(kptr, pop_front)
 	TEST(kptr, pop_back)
+	TEST(global, push_front)
+	TEST(global, push_back)
+	TEST(global, pop_front)
+	TEST(global, pop_back)
 	TEST(map, push_front)
 	TEST(map, push_back)
 	TEST(map, pop_front)
@@ -53,14 +58,12 @@ static struct {
 	TEST(inner_map, pop_front)
 	TEST(inner_map, pop_back)
 #undef TEST
-/* FIXME
 	{ "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
 	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
-*/
 	{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
 	{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
 	{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
@@ -75,7 +78,6 @@ static struct {
 	{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
 	{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
 	{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
-/* FIXME
 	{ "write_after_push_front", "only read is supported" },
 	{ "write_after_push_back", "only read is supported" },
 	{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
@@ -92,11 +94,8 @@ static struct {
 	{ "no_head_type", "bpf_list_head not found at offset=0" },
 	{ "incorrect_head_var_off1", "R1 doesn't have constant offset" },
 	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
-*/
 	{ "incorrect_head_off1", "bpf_list_head not found at offset=17" },
-/* FIXME
 	{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
-*/
 	{ "pop_front_off",
 	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) "
 	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n"
@@ -189,10 +188,8 @@ static void test_linked_list_success(int mode, bool leave_in_map)
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
 	ASSERT_OK(ret, "global_list_push_pop");
 	ASSERT_OK(opts.retval, "global_list_push_pop retval");
-	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
-	*/
 
 	if (mode == PUSH_POP)
 		goto end;
@@ -213,10 +210,8 @@ ppm:
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
 	ASSERT_OK(ret, "global_list_push_pop_multiple");
 	ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
-	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
-	*/
 
 	if (mode == PUSH_POP_MULT)
 		goto end;
@@ -237,10 +232,8 @@ lil:
 	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
 	ASSERT_OK(ret, "global_list_in_list");
 	ASSERT_OK(opts.retval, "global_list_in_list retval");
-	/* FIXME:
 	if (!leave_in_map)
 		clear_fields(skel->maps.data_A);
-	*/
 end:
 	linked_list__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index a99103c86e48..2c7b615c6d41 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -291,10 +291,7 @@ int inner_map_list_push_pop(void *ctx)
 SEC("tc")
 int global_list_push_pop(void *ctx)
 {
-	/* FIXME:
-	 * return test_list_push_pop(&glock, &ghead);
-	 */
-	return 0;
+	return test_list_push_pop(&glock, &ghead);
 }
 
 SEC("tc")
@@ -330,13 +327,10 @@ int global_list_push_pop_multiple(void *ctx)
 {
 	int ret;
 
-	/* FIXME:
 	ret = list_push_pop_multiple(&glock, &ghead, false);
 	if (ret)
 		return ret;
 	return list_push_pop_multiple(&glock, &ghead, true);
-	*/
-	return 0;
 }
 
 SEC("tc")
@@ -370,10 +364,7 @@ int inner_map_list_in_list(void *ctx)
 SEC("tc")
 int global_list_in_list(void *ctx)
 {
-	/* FIXME
 	return test_list_in_list(&glock, &ghead);
-	*/
-	return 0;
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
index 93157efc2d04..8db80ed64db1 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.h
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -47,12 +47,10 @@ struct {
 	},
 };
 
-/* FIXME
 #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
 
 private(A) struct bpf_spin_lock glock;
 private(A) struct bpf_list_head ghead __contains(foo, node);
 private(B) struct bpf_spin_lock glock2;
 
-*/
 #endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 1b7ed1d3a9bb..1d9017240e19 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -59,12 +59,10 @@ CHECK(kptr, push_back, &f->head);
 CHECK(kptr, pop_front, &f->head);
 CHECK(kptr, pop_back, &f->head);
 
-/* FIXME
 CHECK(global, push_front, &ghead);
 CHECK(global, push_back, &ghead);
 CHECK(global, pop_front, &ghead);
 CHECK(global, pop_back, &ghead);
-*/
 
 CHECK(map, push_front, &v->head);
 CHECK(map, push_back, &v->head);
@@ -91,15 +89,23 @@ CHECK(inner_map, pop_back, &iv->head);
 
 #define CHECK_OP(op)                                           \
 	CHECK(kptr_kptr, op, &f1->lock, &f2->head);            \
+	CHECK(kptr_global, op, &f1->lock, &ghead);             \
 	CHECK(kptr_map, op, &f1->lock, &v->head);              \
 	CHECK(kptr_inner_map, op, &f1->lock, &iv->head);       \
                                                                \
+	CHECK(global_global, op, &glock2, &ghead);             \
+	CHECK(global_kptr, op, &glock, &f1->head);             \
+	CHECK(global_map, op, &glock, &v->head);               \
+	CHECK(global_inner_map, op, &glock, &iv->head);        \
+                                                               \
 	CHECK(map_map, op, &v->lock, &v2->head);               \
 	CHECK(map_kptr, op, &v->lock, &f2->head);              \
+	CHECK(map_global, op, &v->lock, &ghead);               \
 	CHECK(map_inner_map, op, &v->lock, &iv->head);         \
                                                                \
 	CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
 	CHECK(inner_map_kptr, op, &iv->lock, &f2->head);       \
+	CHECK(inner_map_global, op, &iv->lock, &ghead);        \
 	CHECK(inner_map_map, op, &iv->lock, &v->head);
 
 CHECK_OP(push_front);
@@ -111,7 +117,6 @@ CHECK_OP(pop_back);
 #undef CHECK_OP
 #undef INIT
 
-/* FIXME
 SEC("?kprobe/xyz")
 int map_compat_kprobe(void *ctx)
 {
@@ -153,7 +158,6 @@ int map_compat_raw_tp_w(void *ctx)
 	bpf_list_push_front(&ghead, NULL);
 	return 0;
 }
-*/
 
 SEC("?tc")
 int obj_type_id_oor(void *ctx)
@@ -299,7 +303,6 @@ int direct_write_node(void *ctx)
 	return 0;
 }
 
-/* FIXME
 static __always_inline
 int write_after_op(void (*push_op)(void *head, void *node))
 {
@@ -503,7 +506,6 @@ int incorrect_head_var_off2(struct __sk_buff *ctx)
 
 	return 0;
 }
-*/
 
 SEC("?tc")
 int incorrect_head_off1(void *ctx)
@@ -527,7 +529,6 @@ int incorrect_head_off1(void *ctx)
 	return 0;
 }
 
-/* FIXME
 SEC("?tc")
 int incorrect_head_off2(void *ctx)
 {
@@ -544,7 +545,6 @@ int incorrect_head_off2(void *ctx)
 
 	return 0;
 }
-*/
 
 static __always_inline
 int pop_ptr_off(void *(*op)(void *head))
-- 
cgit v1.2.3-58-ga151


From dc79f035b2062e4ff4f6432eda18f461f82b1333 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 22 Nov 2022 09:15:29 -0800
Subject: selftests/bpf: Workaround for llvm nop-4 bug

Currently LLVM fails to recognize .data.* as data section and defaults to .text
section. Later BPF backend tries to emit 4-byte NOP instruction which doesn't
exist in BPF ISA and aborts.
The fix for LLVM is pending:
https://reviews.llvm.org/D138477

While waiting for the fix lets workaround the linked_list test case
by using .bss.* prefix which is properly recognized by LLVM as BSS section.

Fix libbpf to support .bss. prefix and adjust tests.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c                               | 3 ++-
 tools/testing/selftests/bpf/prog_tests/linked_list.c | 6 +++---
 tools/testing/selftests/bpf/progs/linked_list.h      | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b5df6aca06ea..93ccea238391 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3511,7 +3511,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			sec_desc->sec_type = SEC_RELO;
 			sec_desc->shdr = sh;
 			sec_desc->data = data;
-		} else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
+		} else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
+							 str_has_pfx(name, BSS_SEC "."))) {
 			sec_desc->sec_type = SEC_BSS;
 			sec_desc->shdr = sh;
 			sec_desc->data = data;
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index dd73d0a62c6e..9a7d4c47af63 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -189,7 +189,7 @@ static void test_linked_list_success(int mode, bool leave_in_map)
 	ASSERT_OK(ret, "global_list_push_pop");
 	ASSERT_OK(opts.retval, "global_list_push_pop retval");
 	if (!leave_in_map)
-		clear_fields(skel->maps.data_A);
+		clear_fields(skel->maps.bss_A);
 
 	if (mode == PUSH_POP)
 		goto end;
@@ -211,7 +211,7 @@ ppm:
 	ASSERT_OK(ret, "global_list_push_pop_multiple");
 	ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
 	if (!leave_in_map)
-		clear_fields(skel->maps.data_A);
+		clear_fields(skel->maps.bss_A);
 
 	if (mode == PUSH_POP_MULT)
 		goto end;
@@ -233,7 +233,7 @@ lil:
 	ASSERT_OK(ret, "global_list_in_list");
 	ASSERT_OK(opts.retval, "global_list_in_list retval");
 	if (!leave_in_map)
-		clear_fields(skel->maps.data_A);
+		clear_fields(skel->maps.bss_A);
 end:
 	linked_list__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
index 8db80ed64db1..3fb2412552fc 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.h
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -47,7 +47,7 @@ struct {
 	},
 };
 
-#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
 
 private(A) struct bpf_spin_lock glock;
 private(A) struct bpf_list_head ghead __contains(foo, node);
-- 
cgit v1.2.3-58-ga151


From f583ddf15e57746e60f3b68d529afc9faa2e2cb3 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 21 Nov 2022 23:54:56 -0600
Subject: selftests/bpf: Add cgroup kfunc / kptr selftests

This patch adds a selftest suite to validate the cgroup kfuncs that were
added in the prior patch.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221122055458.173143-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../testing/selftests/bpf/prog_tests/cgrp_kfunc.c  | 174 ++++++++++++++
 .../selftests/bpf/progs/cgrp_kfunc_common.h        |  71 ++++++
 .../selftests/bpf/progs/cgrp_kfunc_failure.c       | 260 +++++++++++++++++++++
 .../selftests/bpf/progs/cgrp_kfunc_success.c       | 125 ++++++++++
 5 files changed, 631 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 12cf2159975e..b9a3d80204c6 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -10,6 +10,7 @@ bpf_nf                                   # JIT does not support calling kernel f
 bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
 cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
 cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
+cgrp_kfunc                               # JIT does not support calling kernel function
 cgrp_local_storage                       # prog_attach unexpected error: -524                                          (trampoline)
 core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
 d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
new file mode 100644
index 000000000000..a59b166bbcc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <cgroup_helpers.h>
+#include <test_progs.h>
+
+#include "cgrp_kfunc_failure.skel.h"
+#include "cgrp_kfunc_success.skel.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void)
+{
+	struct cgrp_kfunc_success *skel;
+	int err;
+
+	skel = cgrp_kfunc_success__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return NULL;
+
+	skel->bss->pid = getpid();
+
+	err = cgrp_kfunc_success__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	return skel;
+
+cleanup:
+	cgrp_kfunc_success__destroy(skel);
+	return NULL;
+}
+
+static int mkdir_rm_test_dir(void)
+{
+	int fd;
+	const char *cgrp_path = "cgrp_kfunc";
+
+	fd = create_and_get_cgroup(cgrp_path);
+	if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd"))
+		return -1;
+
+	close(fd);
+	remove_cgroup(cgrp_path);
+
+	return 0;
+}
+
+static void run_success_test(const char *prog_name)
+{
+	struct cgrp_kfunc_success *skel;
+	struct bpf_program *prog;
+	struct bpf_link *link = NULL;
+
+	skel = open_load_cgrp_kfunc_skel();
+	if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+		return;
+
+	if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "attached_link"))
+		goto cleanup;
+
+	ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count");
+	if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir"))
+		goto cleanup;
+
+	ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count");
+	ASSERT_OK(skel->bss->err, "post_rmdir_err");
+
+cleanup:
+	bpf_link__destroy(link);
+	cgrp_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+	"test_cgrp_acquire_release_argument",
+	"test_cgrp_acquire_leave_in_map",
+	"test_cgrp_xchg_release",
+	"test_cgrp_get_release",
+};
+
+static struct {
+	const char *prog_name;
+	const char *expected_err_msg;
+} failure_tests[] = {
+	{"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+	{"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
+	{"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
+	{"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+	{"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"},
+	{"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
+	{"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
+	{"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
+	{"cgrp_kfunc_get_null", "arg#0 expected pointer to map value"},
+	{"cgrp_kfunc_xchg_unreleased", "Unreleased reference"},
+	{"cgrp_kfunc_get_unreleased", "Unreleased reference"},
+	{"cgrp_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
+	{"cgrp_kfunc_release_fp", "arg#0 pointer type STRUCT cgroup must point"},
+	{"cgrp_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+	{"cgrp_kfunc_release_unacquired", "release kernel function bpf_cgroup_release expects"},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct cgrp_kfunc_failure *skel;
+	int err, i;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = cgrp_kfunc_failure__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "cgrp_kfunc_failure__open_opts"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+		struct bpf_program *prog;
+		const char *curr_name = failure_tests[i].prog_name;
+
+		prog = bpf_object__find_program_by_name(skel->obj, curr_name);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto cleanup;
+
+		bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
+	}
+
+	err = cgrp_kfunc_failure__load(skel);
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	cgrp_kfunc_failure__destroy(skel);
+}
+
+void test_cgrp_kfunc(void)
+{
+	int i, err;
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "cgrp_env_setup"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+		if (!test__start_subtest(success_tests[i]))
+			continue;
+
+		run_success_test(success_tests[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+		if (!test__start_subtest(failure_tests[i].prog_name))
+			continue;
+
+		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+	}
+
+cleanup:
+	cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
new file mode 100644
index 000000000000..3f18def0e45c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CGRP_KFUNC_COMMON_H
+#define _CGRP_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __cgrps_kfunc_map_value {
+	struct cgroup __kptr_ref * cgrp;
+};
+
+struct hash_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct __cgrps_kfunc_map_value);
+	__uint(max_entries, 1);
+} __cgrps_kfunc_map SEC(".maps");
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+
+static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
+{
+	s32 id;
+	long status;
+
+	status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+	if (status)
+		return NULL;
+
+	return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+}
+
+static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp)
+{
+	struct __cgrps_kfunc_map_value local, *v;
+	long status;
+	struct cgroup *acquired, *old;
+	s32 id;
+
+	status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+	if (status)
+		return status;
+
+	local.cgrp = NULL;
+	status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+	if (status)
+		return status;
+
+	v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+	if (!v) {
+		bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+		return -ENOENT;
+	}
+
+	acquired = bpf_cgroup_acquire(cgrp);
+	old = bpf_kptr_xchg(&v->cgrp, acquired);
+	if (old) {
+		bpf_cgroup_release(old);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
+#endif /* _CGRP_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
new file mode 100644
index 000000000000..a1369b5ebcf8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ *         TP_PROTO(struct cgroup *cgrp, const char *path),
+ *         TP_ARGS(cgrp, path)
+ */
+
+static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp)
+{
+	int status;
+
+	status = cgrps_kfunc_map_insert(cgrp);
+	if (status)
+		return NULL;
+
+	return cgrps_kfunc_map_value_lookup(cgrp);
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired;
+	struct __cgrps_kfunc_map_value *v;
+
+	v = insert_lookup_cgrp(cgrp);
+	if (!v)
+		return 0;
+
+	/* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */
+	acquired = bpf_cgroup_acquire(v->cgrp);
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path;
+
+	/* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */
+	acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp);
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("kretprobe/cgroup_destroy_locked")
+int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
+{
+	struct cgroup *acquired;
+
+	/* Can't acquire an untrusted struct cgroup * pointer. */
+	acquired = bpf_cgroup_acquire(cgrp);
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired;
+
+	/* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */
+	acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp);
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired;
+
+	/* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */
+	acquired = bpf_cgroup_acquire(NULL);
+	if (!acquired)
+		return 0;
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired;
+
+	acquired = bpf_cgroup_acquire(cgrp);
+
+	/* Acquired cgroup is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+
+	/* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */
+	kptr = bpf_cgroup_kptr_get(&cgrp);
+	if (!kptr)
+		return 0;
+
+	bpf_cgroup_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr, *acquired;
+
+	acquired = bpf_cgroup_acquire(cgrp);
+
+	/* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */
+	kptr = bpf_cgroup_kptr_get(&acquired);
+	bpf_cgroup_release(acquired);
+	if (!kptr)
+		return 0;
+
+	bpf_cgroup_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+
+	/* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */
+	kptr = bpf_cgroup_kptr_get(NULL);
+	if (!kptr)
+		return 0;
+
+	bpf_cgroup_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+	struct __cgrps_kfunc_map_value *v;
+
+	v = insert_lookup_cgrp(cgrp);
+	if (!v)
+		return 0;
+
+	kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+	if (!kptr)
+		return 0;
+
+	/* Kptr retrieved from map is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+	struct __cgrps_kfunc_map_value *v;
+
+	v = insert_lookup_cgrp(cgrp);
+	if (!v)
+		return 0;
+
+	kptr = bpf_cgroup_kptr_get(&v->cgrp);
+	if (!kptr)
+		return 0;
+
+	/* Kptr acquired above is never released. */
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
+{
+	struct __cgrps_kfunc_map_value *v;
+
+	v = insert_lookup_cgrp(cgrp);
+	if (!v)
+		return 0;
+
+	/* Can't invoke bpf_cgroup_release() on an untrusted pointer. */
+	bpf_cgroup_release(v->cgrp);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired = (struct cgroup *)&path;
+
+	/* Cannot release random frame pointer. */
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
+{
+	struct __cgrps_kfunc_map_value local, *v;
+	long status;
+	struct cgroup *acquired, *old;
+	s32 id;
+
+	status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+	if (status)
+		return 0;
+
+	local.cgrp = NULL;
+	status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+	if (status)
+		return status;
+
+	v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+	if (!v)
+		return -ENOENT;
+
+	acquired = bpf_cgroup_acquire(cgrp);
+
+	old = bpf_kptr_xchg(&v->cgrp, acquired);
+
+	/* old cannot be passed to bpf_cgroup_release() without a NULL check. */
+	bpf_cgroup_release(old);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path)
+{
+	/* Cannot release trusted cgroup pointer which was not acquired. */
+	bpf_cgroup_release(cgrp);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
new file mode 100644
index 000000000000..9f4569f7598b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid, invocations;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ *         TP_PROTO(struct cgroup *cgrp, const char *path),
+ *         TP_ARGS(cgrp, path)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	bool same = pid == cur_pid;
+
+	if (same)
+		__sync_fetch_and_add(&invocations, 1);
+
+	return same;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *acquired;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	acquired = bpf_cgroup_acquire(cgrp);
+	bpf_cgroup_release(acquired);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path)
+{
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = cgrps_kfunc_map_insert(cgrp);
+	if (status)
+		err = 1;
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+	struct __cgrps_kfunc_map_value *v;
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = cgrps_kfunc_map_insert(cgrp);
+	if (status) {
+		err = 1;
+		return 0;
+	}
+
+	v = cgrps_kfunc_map_value_lookup(cgrp);
+	if (!v) {
+		err = 2;
+		return 0;
+	}
+
+	kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+	if (!kptr) {
+		err = 3;
+		return 0;
+	}
+
+	bpf_cgroup_release(kptr);
+
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *kptr;
+	struct __cgrps_kfunc_map_value *v;
+	long status;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	status = cgrps_kfunc_map_insert(cgrp);
+	if (status) {
+		err = 1;
+		return 0;
+	}
+
+	v = cgrps_kfunc_map_value_lookup(cgrp);
+	if (!v) {
+		err = 2;
+		return 0;
+	}
+
+	kptr = bpf_cgroup_kptr_get(&v->cgrp);
+	if (!kptr) {
+		err = 3;
+		return 0;
+	}
+
+	bpf_cgroup_release(kptr);
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 227a89cf504188759fd3d8933a3a06fc60cca7f2 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 21 Nov 2022 23:54:58 -0600
Subject: selftests/bpf: Add selftests for bpf_cgroup_ancestor() kfunc

bpf_cgroup_ancestor() allows BPF programs to access the ancestor of a
struct cgroup *. This patch adds selftests that validate its expected
behavior.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221122055458.173143-5-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/cgrp_kfunc.c  |  1 +
 .../selftests/bpf/progs/cgrp_kfunc_common.h        |  1 +
 .../selftests/bpf/progs/cgrp_kfunc_success.c       | 45 ++++++++++++++++++++++
 3 files changed, 47 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index a59b166bbcc4..973f0c5af965 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -86,6 +86,7 @@ static const char * const success_tests[] = {
 	"test_cgrp_acquire_leave_in_map",
 	"test_cgrp_xchg_release",
 	"test_cgrp_get_release",
+	"test_cgrp_get_ancestors",
 };
 
 static struct {
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 3f18def0e45c..7d30855bfe78 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -23,6 +23,7 @@ struct hash_map {
 struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
 struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
 void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
 
 static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
 {
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 9f4569f7598b..0c23ea32df9f 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -123,3 +123,48 @@ int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
 
 	return 0;
 }
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup *self, *ancestor1, *invalid;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	self = bpf_cgroup_ancestor(cgrp, cgrp->level);
+	if (!self) {
+		err = 1;
+		return 0;
+	}
+
+	if (self->self.id != cgrp->self.id) {
+		bpf_cgroup_release(self);
+		err = 2;
+		return 0;
+	}
+	bpf_cgroup_release(self);
+
+	ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+	if (!ancestor1) {
+		err = 3;
+		return 0;
+	}
+	bpf_cgroup_release(ancestor1);
+
+	invalid = bpf_cgroup_ancestor(cgrp, 10000);
+	if (invalid) {
+		bpf_cgroup_release(invalid);
+		err = 4;
+		return 0;
+	}
+
+	invalid = bpf_cgroup_ancestor(cgrp, -1);
+	if (invalid) {
+		bpf_cgroup_release(invalid);
+		err = 5;
+		return 0;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 8ac88eece8009428e2577c345080a458e4507e2f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 23 Nov 2022 12:08:29 -0800
Subject: selftests/bpf: Mount debugfs in setns_by_fd

Jiri reports broken test_progs after recent commit 68f8e3d4b916
("selftests/bpf: Make sure zero-len skbs aren't redirectable").
Apparently we don't remount debugfs when we switch back networking namespace.
Let's explicitly mount /sys/kernel/debug.

0: https://lore.kernel.org/bpf/63b85917-a2ea-8e35-620c-808560910819@meta.com/T/#ma66ca9c92e99eee0a25e40f422489b26ee0171c1

Fixes: a30338840fa5 ("selftests/bpf: Move open_netns() and close_netns() into network_helpers.c")
Reported-by: Jiri Olsa <olsajiri@gmail.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20221123200829.2226254-1-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/network_helpers.c            | 4 ++++
 tools/testing/selftests/bpf/prog_tests/empty_skb.c       | 2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c    | 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index bec15558fd93..1f37adff7632 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -426,6 +426,10 @@ static int setns_by_fd(int nsfd)
 	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
 		return err;
 
+	err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys/kernel/debug"))
+		return err;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 32dd731e9070..0613f3bb8b5e 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -9,7 +9,7 @@
 		goto out; \
 })
 
-void test_empty_skb(void)
+void serial_test_empty_skb(void)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	struct empty_skb *bpf_obj = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index a50971c6cf4a..9ac6f6a268db 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd)
 }
 
 #define NUM_PKTS 10000
-void test_xdp_do_redirect(void)
+void serial_test_xdp_do_redirect(void)
 {
 	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
 	char data[sizeof(pkt_udp) + sizeof(__u32)];
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index c72083885b6d..13daa3746064 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -174,7 +174,7 @@ out:
 	system("ip netns del synproxy");
 }
 
-void test_xdp_synproxy(void)
+void serial_test_xdp_synproxy(void)
 {
 	if (test__start_subtest("xdp"))
 		test_synproxy(true);
-- 
cgit v1.2.3-58-ga151


From 8e898aaa733eca61393fc036c8a4b5834fee5dd3 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 22 Nov 2022 19:54:21 -0800
Subject: selftests/bpf: Add reproducer for decl_tag in func_proto argument

It should trigger a WARN_ON_ONCE in btf_type_id_size:

  RIP: 0010:btf_type_id_size+0x8bd/0x940 kernel/bpf/btf.c:1952
  btf_func_proto_check kernel/bpf/btf.c:4506 [inline]
  btf_check_all_types kernel/bpf/btf.c:4734 [inline]
  btf_parse_type_sec+0x1175/0x1980 kernel/bpf/btf.c:4763
  btf_parse kernel/bpf/btf.c:5042 [inline]
  btf_new_fd+0x65a/0xb00 kernel/bpf/btf.c:6709
  bpf_btf_load+0x6f/0x90 kernel/bpf/syscall.c:4342
  __sys_bpf+0x50a/0x6c0 kernel/bpf/syscall.c:5034
  __do_sys_bpf kernel/bpf/syscall.c:5093 [inline]
  __se_sys_bpf kernel/bpf/syscall.c:5091 [inline]
  __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5091
  do_syscall_64+0x54/0x70 arch/x86/entry/common.c:48

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221123035422.872531-1-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 95a2b80f0d17..de1b5b9eb93a 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3948,6 +3948,20 @@ static struct btf_raw_test raw_tests[] = {
 	.btf_load_err = true,
 	.err_str = "Invalid return type",
 },
+{
+	.descr = "decl_tag test #17, func proto, argument",
+	.raw_types = {
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1),	/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */
+		BTF_FUNC_PROTO_ENC(0, 1),			/* [3] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_VAR_ENC(NAME_TBD, 2, 0),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag1\0var"),
+	.btf_load_err = true,
+	.err_str = "Invalid arg#1",
+},
 {
 	.descr = "type_tag test #1",
 	.raw_types = {
-- 
cgit v1.2.3-58-ga151


From 72b43bde38de4aa05e6a7fa12d7965f48180deb6 Mon Sep 17 00:00:00 2001
From: Ji Rongfeng <SikoJobs@outlook.com>
Date: Fri, 18 Nov 2022 16:18:18 +0800
Subject: bpf: Update bpf_{g,s}etsockopt() documentation

* append missing optnames to the end
* simplify bpf_getsockopt()'s doc

Signed-off-by: Ji Rongfeng <SikoJobs@outlook.com>
Link: https://lore.kernel.org/r/DU0P192MB15479B86200B1216EC90E162D6099@DU0P192MB1547.EURP192.PROD.OUTLOOK.COM
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/uapi/linux/bpf.h       | 21 ++++++++++++---------
 tools/include/uapi/linux/bpf.h | 23 +++++++++++++----------
 2 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ab86145df760..f89de51a45db 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2584,14 +2584,19 @@ union bpf_attr {
  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
  * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * 		  **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * 		  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * 		  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * 		  **TCP_BPF_RTO_MIN**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		* **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * 		  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -2808,12 +2813,10 @@ union bpf_attr {
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
- * 		It supports the following *level*\ s:
- *
- * 		* **IPPROTO_TCP**, which supports *optname*
- * 		  **TCP_CONGESTION**.
- * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		It supports the same set of *optname*\ s that is supported by
+ * 		the **bpf_setsockopt**\ () helper.  The exceptions are
+ * 		**TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * 		**TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6580448e9f77..f89de51a45db 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2584,14 +2584,19 @@ union bpf_attr {
  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
  * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
- * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
+ * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**,
+ * 		  **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**.
  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ * 		  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
+ * 		  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
+ * 		  **TCP_BPF_RTO_MIN**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		* **IPPROTO_IPV6**, which supports the following *optname*\ s:
+ * 		  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -2647,7 +2652,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the endpoint referenced by *map* at
  * 		index *key*. Depending on its type, this *map* can contain
@@ -2808,12 +2813,10 @@ union bpf_attr {
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
- * 		It supports the following *level*\ s:
- *
- * 		* **IPPROTO_TCP**, which supports *optname*
- * 		  **TCP_CONGESTION**.
- * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
- * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * 		It supports the same set of *optname*\ s that is supported by
+ * 		the **bpf_setsockopt**\ () helper.  The exceptions are
+ * 		**TCP_BPF_*** is **bpf_setsockopt**\ () only and
+ * 		**TCP_SAVED_SYN** is **bpf_getsockopt**\ () only.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
-- 
cgit v1.2.3-58-ga151


From f471748b7fe5ab7ec6de4cbadffabfa7bb5b6240 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Tue, 22 Nov 2022 08:53:00 -0600
Subject: selftests/bpf: Add selftests for bpf_task_from_pid()

Add some selftest testcases that validate the expected behavior of the
bpf_task_from_pid() kfunc that was added in the prior patch.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221122145300.251210-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/task_kfunc.c  |  4 ++
 .../selftests/bpf/progs/task_kfunc_common.h        |  1 +
 .../selftests/bpf/progs/task_kfunc_failure.c       | 13 ++++
 .../selftests/bpf/progs/task_kfunc_success.c       | 73 ++++++++++++++++++++++
 4 files changed, 91 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index 4994fe6092cc..ffd8ef4303c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -78,6 +78,9 @@ static const char * const success_tests[] = {
 	"test_task_xchg_release",
 	"test_task_get_release",
 	"test_task_current_acquire_release",
+	"test_task_from_pid_arg",
+	"test_task_from_pid_current",
+	"test_task_from_pid_invalid",
 };
 
 static struct {
@@ -99,6 +102,7 @@ static struct {
 	{"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
 	{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
 	{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
+	{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
 };
 
 static void verify_fail(const char *prog_name, const char *expected_err_msg)
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index 160d6dde00be..c0ffd171743e 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -23,6 +23,7 @@ struct hash_map {
 struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
 struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
 void bpf_task_release(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
 
 static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
 {
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index 93e934ddfcb6..e310473190d5 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -258,3 +258,16 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_
 
 	return 0;
 }
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_from_pid(task->pid);
+
+	/* Releasing bpf_task_from_pid() lookup without a NULL check. */
+	bpf_task_release(acquired);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
index be4534b5ba2e..60c7ead41cfc 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -147,3 +147,76 @@ int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 cl
 
 	return 0;
 }
+
+static void lookup_compare_pid(const struct task_struct *p)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_from_pid(p->pid);
+	if (!acquired) {
+		err = 1;
+		return;
+	}
+
+	if (acquired->pid != p->pid)
+		err = 2;
+	bpf_task_release(acquired);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	lookup_compare_pid(task);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *current, *acquired;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	lookup_compare_pid(bpf_get_current_task_btf());
+	return 0;
+}
+
+static int is_pid_lookup_valid(s32 pid)
+{
+	struct task_struct *acquired;
+
+	acquired = bpf_task_from_pid(pid);
+	if (acquired) {
+		bpf_task_release(acquired);
+		return 1;
+	}
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
+{
+	struct task_struct *acquired;
+
+	if (!is_test_kfunc_task())
+		return 0;
+
+	if (is_pid_lookup_valid(-1)) {
+		err = 1;
+		return 0;
+	}
+
+	if (is_pid_lookup_valid(0xcafef00d)) {
+		err = 2;
+		return 0;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From d43eff0b85ae28dd2b95dab18a3e7dcb813d7bc1 Mon Sep 17 00:00:00 2001
From: Jonathan Toppins <jtoppins@redhat.com>
Date: Tue, 22 Nov 2022 15:25:04 -0500
Subject: selftests: bonding: up/down delay w/ slave link flapping

Verify when a bond is configured with {up,down}delay and the link state
of slave members flaps if there are no remaining members up the bond
should immediately select a member to bring up. (from bonding.txt
section 13.1 paragraph 4)

Suggested-by: Liang Li <liali@redhat.com>
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/drivers/net/bonding/Makefile |   4 +-
 .../selftests/drivers/net/bonding/lag_lib.sh       | 106 +++++++++++++++++++++
 .../drivers/net/bonding/mode-1-recovery-updelay.sh |  45 +++++++++
 .../drivers/net/bonding/mode-2-recovery-updelay.sh |  45 +++++++++
 .../testing/selftests/drivers/net/bonding/settings |   2 +-
 5 files changed, 200 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
 create mode 100755 tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 6b8d2e2f23c2..0f3921908b07 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -5,7 +5,9 @@ TEST_PROGS := \
 	bond-arp-interval-causes-panic.sh \
 	bond-break-lacpdu-tx.sh \
 	bond-lladdr-target.sh \
-	dev_addr_lists.sh
+	dev_addr_lists.sh \
+	mode-1-recovery-updelay.sh \
+	mode-2-recovery-updelay.sh
 
 TEST_FILES := \
 	lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
index 16c7fb858ac1..2a268b17b61f 100644
--- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+NAMESPACES=""
+
 # Test that a link aggregation device (bonding, team) removes the hardware
 # addresses that it adds on its underlying devices.
 test_LAG_cleanup()
@@ -59,3 +61,107 @@ test_LAG_cleanup()
 
 	log_test "$driver cleanup mode $mode"
 }
+
+# Build a generic 2 node net namespace with 2 connections
+# between the namespaces
+#
+#  +-----------+       +-----------+
+#  | node1     |       | node2     |
+#  |           |       |           |
+#  |           |       |           |
+#  |      eth0 +-------+ eth0      |
+#  |           |       |           |
+#  |      eth1 +-------+ eth1      |
+#  |           |       |           |
+#  +-----------+       +-----------+
+lag_setup2x2()
+{
+	local state=${1:-down}
+	local namespaces="lag_node1 lag_node2"
+
+	# create namespaces
+	for n in ${namespaces}; do
+		ip netns add ${n}
+	done
+
+	# wire up namespaces
+	ip link add name lag1 type veth peer name lag1-end
+	ip link set dev lag1 netns lag_node1 $state name eth0
+	ip link set dev lag1-end netns lag_node2 $state name eth0
+
+	ip link add name lag1 type veth peer name lag1-end
+	ip link set dev lag1 netns lag_node1 $state name eth1
+	ip link set dev lag1-end netns lag_node2 $state name eth1
+
+	NAMESPACES="${namespaces}"
+}
+
+# cleanup all lag related namespaces and remove the bonding module
+lag_cleanup()
+{
+	for n in ${NAMESPACES}; do
+		ip netns delete ${n} >/dev/null 2>&1 || true
+	done
+	modprobe -r bonding
+}
+
+SWITCH="lag_node1"
+CLIENT="lag_node2"
+CLIENTIP="172.20.2.1"
+SWITCHIP="172.20.2.2"
+
+lag_setup_network()
+{
+	lag_setup2x2 "down"
+
+	# create switch
+	ip netns exec ${SWITCH} ip link add br0 up type bridge
+	ip netns exec ${SWITCH} ip link set eth0 master br0 up
+	ip netns exec ${SWITCH} ip link set eth1 master br0 up
+	ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0
+}
+
+lag_reset_network()
+{
+	ip netns exec ${CLIENT} ip link del bond0
+	ip netns exec ${SWITCH} ip link set eth0 up
+	ip netns exec ${SWITCH} ip link set eth1 up
+}
+
+create_bond()
+{
+	# create client
+	ip netns exec ${CLIENT} ip link set eth0 down
+	ip netns exec ${CLIENT} ip link set eth1 down
+
+	ip netns exec ${CLIENT} ip link add bond0 type bond $@
+	ip netns exec ${CLIENT} ip link set eth0 master bond0
+	ip netns exec ${CLIENT} ip link set eth1 master bond0
+	ip netns exec ${CLIENT} ip link set bond0 up
+	ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0
+}
+
+test_bond_recovery()
+{
+	RET=0
+
+	create_bond $@
+
+	# verify connectivity
+	ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+	check_err $? "No connectivity"
+
+	# force the links of the bond down
+	ip netns exec ${SWITCH} ip link set eth0 down
+	sleep 2
+	ip netns exec ${SWITCH} ip link set eth0 up
+	ip netns exec ${SWITCH} ip link set eth1 down
+
+	# re-verify connectivity
+	ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
+
+	local rc=$?
+	check_err $rc "Bond failed to recover"
+	log_test "$1 ($2) bond recovery"
+	lag_reset_network
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
new file mode 100755
index 000000000000..ad4c845a4ac7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#  When the bond is configured with down/updelay and the link state of
+#  slave members flaps if there are no remaining members up the bond
+#  should immediately select a member to bring up. (from bonding.txt
+#  section 13.1 paragraph 4)
+#
+#  +-------------+       +-----------+
+#  | client      |       | switch    |
+#  |             |       |           |
+#  |    +--------| link1 |-----+     |
+#  |    |        +-------+     |     |
+#  |    |        |       |     |     |
+#  |    |        +-------+     |     |
+#  |    | bond   | link2 | Br0 |     |
+#  +-------------+       +-----------+
+#     172.20.2.1           172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+	lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 1 miimon 100 updelay 0
+test_bond_recovery mode 1 miimon 100 updelay 200
+test_bond_recovery mode 1 miimon 100 updelay 500
+test_bond_recovery mode 1 miimon 100 updelay 1000
+test_bond_recovery mode 1 miimon 100 updelay 2000
+test_bond_recovery mode 1 miimon 100 updelay 5000
+test_bond_recovery mode 1 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
new file mode 100755
index 000000000000..2330d37453f9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#  When the bond is configured with down/updelay and the link state of
+#  slave members flaps if there are no remaining members up the bond
+#  should immediately select a member to bring up. (from bonding.txt
+#  section 13.1 paragraph 4)
+#
+#  +-------------+       +-----------+
+#  | client      |       | switch    |
+#  |             |       |           |
+#  |    +--------| link1 |-----+     |
+#  |    |        +-------+     |     |
+#  |    |        |       |     |     |
+#  |    |        +-------+     |     |
+#  |    | bond   | link2 | Br0 |     |
+#  +-------------+       +-----------+
+#     172.20.2.1           172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/net_forwarding_lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+	lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 2 miimon 100 updelay 0
+test_bond_recovery mode 2 miimon 100 updelay 200
+test_bond_recovery mode 2 miimon 100 updelay 500
+test_bond_recovery mode 2 miimon 100 updelay 1000
+test_bond_recovery mode 2 miimon 100 updelay 2000
+test_bond_recovery mode 2 miimon 100 updelay 5000
+test_bond_recovery mode 2 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
index 867e118223cd..6091b45d226b 100644
--- a/tools/testing/selftests/drivers/net/bonding/settings
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -1 +1 @@
-timeout=60
+timeout=120
-- 
cgit v1.2.3-58-ga151


From 48671232fcb81b76be13c11b0df7089b16baea57 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 23 Nov 2022 21:32:22 -0800
Subject: selftests/bpf: Add tests for bpf_rcu_read_lock()

Add a few positive/negative tests to test bpf_rcu_read_lock()
and its corresponding verifier support. The new test will fail
on s390x and aarch64, so an entry is added to each of their
respective deny lists.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221124053222.2374650-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.aarch64       |   1 +
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../selftests/bpf/prog_tests/rcu_read_lock.c       | 158 +++++++++++
 tools/testing/selftests/bpf/progs/rcu_read_lock.c  | 290 +++++++++++++++++++++
 4 files changed, 450 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
 create mode 100644 tools/testing/selftests/bpf/progs/rcu_read_lock.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index affc5aebbf0f..8e77515d56f6 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -45,6 +45,7 @@ modify_return                                    # modify_return__attach failed
 module_attach                                    # skel_attach skeleton attach failed: -524
 mptcp/base                                       # run_test mptcp unexpected error: -524 (errno 524)
 netcnt                                           # packets unexpected packets: actual 10001 != expected 10000
+rcu_read_lock                                    # failed to attach: ERROR: strerror_r(-524)=22
 recursion                                        # skel_attach unexpected error: -524 (errno 524)
 ringbuf                                          # skel_attach skeleton attachment failed: -1
 setget_sockopt                                   # attach_cgroup unexpected error: -524
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index b9a3d80204c6..648a8a1b6b78 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -43,6 +43,7 @@ module_attach                            # skel_attach skeleton attach failed: -
 mptcp
 netcnt                                   # failed to load BPF skeleton 'netcnt_prog': -7                               (?)
 probe_user                               # check_kprobe_res wrong kprobe res from probe read                           (?)
+rcu_read_lock                            # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
 recursion                                # skel_attach unexpected error: -524                                          (trampoline)
 ringbuf                                  # skel_load skeleton load failed                                              (?)
 select_reuseport                         # intermittently fails on new s390x setup
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
new file mode 100644
index 000000000000..447d8560ecb6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "rcu_read_lock.skel.h"
+#include "cgroup_helpers.h"
+
+static unsigned long long cgroup_id;
+
+static void test_success(void)
+{
+	struct rcu_read_lock *skel;
+	int err;
+
+	skel = rcu_read_lock__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
+	bpf_program__set_autoload(skel->progs.task_succ, true);
+	bpf_program__set_autoload(skel->progs.no_lock, true);
+	bpf_program__set_autoload(skel->progs.two_regions, true);
+	bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
+	bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+	err = rcu_read_lock__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	err = rcu_read_lock__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val");
+	ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+	rcu_read_lock__destroy(skel);
+}
+
+static void test_rcuptr_acquire(void)
+{
+	struct rcu_read_lock *skel;
+	int err;
+
+	skel = rcu_read_lock__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	bpf_program__set_autoload(skel->progs.task_acquire, true);
+	err = rcu_read_lock__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	err = rcu_read_lock__attach(skel);
+	ASSERT_OK(err, "skel_attach");
+out:
+	rcu_read_lock__destroy(skel);
+}
+
+static const char * const inproper_region_tests[] = {
+	"miss_lock",
+	"miss_unlock",
+	"non_sleepable_rcu_mismatch",
+	"inproper_sleepable_helper",
+	"inproper_sleepable_kfunc",
+	"nested_rcu_region",
+};
+
+static void test_inproper_region(void)
+{
+	struct rcu_read_lock *skel;
+	struct bpf_program *prog;
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) {
+		skel = rcu_read_lock__open();
+		if (!ASSERT_OK_PTR(skel, "skel_open"))
+			return;
+
+		prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto out;
+		bpf_program__set_autoload(prog, true);
+		err = rcu_read_lock__load(skel);
+		ASSERT_ERR(err, "skel_load");
+out:
+		rcu_read_lock__destroy(skel);
+	}
+}
+
+static const char * const rcuptr_misuse_tests[] = {
+	"task_untrusted_non_rcuptr",
+	"task_untrusted_rcuptr",
+	"cross_rcu_region",
+};
+
+static void test_rcuptr_misuse(void)
+{
+	struct rcu_read_lock *skel;
+	struct bpf_program *prog;
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) {
+		skel = rcu_read_lock__open();
+		if (!ASSERT_OK_PTR(skel, "skel_open"))
+			return;
+
+		prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto out;
+		bpf_program__set_autoload(prog, true);
+		err = rcu_read_lock__load(skel);
+		ASSERT_ERR(err, "skel_load");
+out:
+		rcu_read_lock__destroy(skel);
+	}
+}
+
+void test_rcu_read_lock(void)
+{
+	struct btf *vmlinux_btf;
+	int cgroup_fd;
+
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+		return;
+	if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) {
+		test__skip();
+		goto out;
+	}
+
+	cgroup_fd = test__join_cgroup("/rcu_read_lock");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
+		goto out;
+
+	cgroup_id = get_cgroup_id("/rcu_read_lock");
+	if (test__start_subtest("success"))
+		test_success();
+	if (test__start_subtest("rcuptr_acquire"))
+		test_rcuptr_acquire();
+	if (test__start_subtest("negative_tests_inproper_region"))
+		test_inproper_region();
+	if (test__start_subtest("negative_tests_rcuptr_misuse"))
+		test_rcuptr_misuse();
+	close(cgroup_fd);
+out:
+	btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
new file mode 100644
index 000000000000..94a970076b98
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+__u32 user_data, key_serial, target_pid;
+__u64 flags, task_storage_val, cgroup_id;
+
+struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+void bpf_key_put(struct bpf_key *key) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int get_cgroup_id(void *ctx)
+{
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	/* simulate bpf_get_current_cgroup_id() helper */
+	bpf_rcu_read_lock();
+	cgroup_id = task->cgroups->dfl_cgrp->kn->id;
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_succ(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+	long init_val = 2;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	bpf_rcu_read_lock();
+	/* region including helper using rcu ptr real_parent */
+	real_parent = task->real_parent;
+	ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		goto out;
+	ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	if (!ptr)
+		goto out;
+	task_storage_val = *ptr;
+out:
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int no_lock(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* no bpf_rcu_read_lock(), old code still works */
+	task = bpf_get_current_task_btf();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int two_regions(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* two regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_1(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_2(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	bpf_rcu_read_lock();
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_unlock();
+
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int task_acquire(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	/* acquire a reference which can be used outside rcu read lock region */
+	real_parent = bpf_task_acquire(real_parent);
+	bpf_rcu_read_unlock();
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_task_release(real_parent);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_lock(void *ctx)
+{
+	struct task_struct *task;
+	struct css_set *cgroups;
+	struct cgroup *dfl_cgrp;
+
+	/* missing bpf_rcu_read_lock() */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	(void)bpf_task_storage_get(&map_a, task, 0, 0);
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_unlock(void *ctx)
+{
+	struct task_struct *task;
+	struct css_set *cgroups;
+	struct cgroup *dfl_cgrp;
+
+	/* missing bpf_rcu_read_unlock() */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	(void)bpf_task_storage_get(&map_a, task, 0, 0);
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_rcu_mismatch(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	task = bpf_get_current_task_btf();
+	/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	if (real_parent)
+		bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int inproper_sleepable_helper(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+	struct pt_regs *regs;
+	__u32 value = 0;
+	void *ptr;
+
+	task = bpf_get_current_task_btf();
+	/* sleepable helper in rcu read lock region */
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
+	if (!regs) {
+		bpf_rcu_read_unlock();
+		return 0;
+	}
+
+	ptr = (void *)PT_REGS_IP(regs);
+	(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
+	user_data = value;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct bpf_key *bkey;
+
+	/* sleepable kfunc in rcu read lock region */
+	bpf_rcu_read_lock();
+	bkey = bpf_lookup_user_key(key_serial, flags);
+	bpf_rcu_read_unlock();
+	if (!bkey)
+		return -1;
+	bpf_key_put(bkey);
+
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* nested rcu read lock regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_non_rcuptr(void *ctx)
+{
+	struct task_struct *task, *last_wakee;
+
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	/* the pointer last_wakee marked as untrusted */
+	last_wakee = task->real_parent->last_wakee;
+	(void)bpf_task_storage_get(&map_a, last_wakee, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_rcuptr(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	bpf_rcu_read_unlock();
+	/* helper use of rcu ptr outside the rcu read lock region */
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int cross_rcu_region(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* rcu ptr define/use in different regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_lock();
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+	bpf_rcu_read_unlock();
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From ca7ae89160434cd045a4795a235eb16587bd8f73 Mon Sep 17 00:00:00 2001
From: Dmytro Shytyi <dmytro@shytyi.net>
Date: Fri, 25 Nov 2022 23:29:54 +0100
Subject: selftests: mptcp: mptfo Initiator/Listener

This patch first adds TFO support in mptcp_connect.c.

This can be enabled via a new option: -o MPTFO.

Once enabled, the TCP_FASTOPEN socket option is enabled for the server
side and a sendto() with MSG_FASTOPEN is used instead of a connect() for
the client side.

Note that the first SYN has a limit of bytes it can carry. In other
words, it is allowed to send less data than the provided one. We then
need to track more status info to properly allow the next sendmsg()
starting from the next part of the data to send the rest.

Also in TFO scenarios, we need to completely spool the partially xmitted
buffer -- and account for that -- before starting sendfile/mmap xmit,
otherwise the relevant tests will fail.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c  | 171 ++++++++++++++++-----
 tools/testing/selftests/net/mptcp/mptcp_connect.sh |  21 +++
 2 files changed, 150 insertions(+), 42 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index e54653ea2ed4..8a8266957bc5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -83,6 +83,7 @@ struct cfg_cmsg_types {
 
 struct cfg_sockopt_types {
 	unsigned int transparent:1;
+	unsigned int mptfo:1;
 };
 
 struct tcp_inq_state {
@@ -90,6 +91,13 @@ struct tcp_inq_state {
 	bool expect_eof;
 };
 
+struct wstate {
+	char buf[8192];
+	unsigned int len;
+	unsigned int off;
+	unsigned int total_len;
+};
+
 static struct tcp_inq_state tcp_inq;
 
 static struct cfg_cmsg_types cfg_cmsg_types;
@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf)
 	}
 }
 
+static void set_mptfo(int fd, int pf)
+{
+	int qlen = 25;
+
+	if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
+		perror("TCP_FASTOPEN");
+}
+
 static int do_ulp_so(int sock, const char *name)
 {
 	return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
 		if (cfg_sockopt_types.transparent)
 			set_transparent(sock, pf);
 
+		if (cfg_sockopt_types.mptfo)
+			set_mptfo(sock, pf);
+
 		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
 			break; /* success */
 
@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr,
 
 static int sock_connect_mptcp(const char * const remoteaddr,
 			      const char * const port, int proto,
-			      struct addrinfo **peer)
+			      struct addrinfo **peer,
+			      int infd, struct wstate *winfo)
 {
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
+	int syn_copied = 0;
 	int sock = -1;
 
 	hints.ai_family = pf;
@@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 		if (cfg_mark)
 			set_mark(sock, cfg_mark);
 
-		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
-			*peer = a;
-			break; /* success */
+		if (cfg_sockopt_types.mptfo) {
+			if (!winfo->total_len)
+				winfo->total_len = winfo->len = read(infd, winfo->buf,
+								     sizeof(winfo->buf));
+
+			syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
+					    a->ai_addr, a->ai_addrlen);
+			if (syn_copied >= 0) {
+				winfo->off = syn_copied;
+				winfo->len -= syn_copied;
+				*peer = a;
+				break; /* success */
+			}
+		} else {
+			if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+				*peer = a;
+				break; /* success */
+			}
+		}
+		if (cfg_sockopt_types.mptfo) {
+			perror("sendto()");
+			close(sock);
+			sock = -1;
+		} else {
+			perror("connect()");
+			close(sock);
+			sock = -1;
 		}
-
-		perror("connect()");
-		close(sock);
-		sock = -1;
 	}
 
 	freeaddrinfo(addr);
@@ -571,14 +612,14 @@ static void shut_wr(int fd)
 	shutdown(fd, SHUT_WR);
 }
 
-static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
+static int copyfd_io_poll(int infd, int peerfd, int outfd,
+			  bool *in_closed_after_out, struct wstate *winfo)
 {
 	struct pollfd fds = {
 		.fd = peerfd,
 		.events = POLLIN | POLLOUT,
 	};
-	unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0;
-	char wbuf[8192];
+	unsigned int total_wlen = 0, total_rlen = 0;
 
 	set_nonblock(peerfd, true);
 
@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 		}
 
 		if (fds.revents & POLLOUT) {
-			if (wlen == 0) {
-				woff = 0;
-				wlen = read(infd, wbuf, sizeof(wbuf));
+			if (winfo->len == 0) {
+				winfo->off = 0;
+				winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
 			}
 
-			if (wlen > 0) {
+			if (winfo->len > 0) {
 				ssize_t bw;
 
 				/* limit the total amount of written data to the trunc value */
-				if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate)
-					wlen = cfg_truncate - total_wlen;
+				if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
+					winfo->len = cfg_truncate - total_wlen;
 
-				bw = do_rnd_write(peerfd, wbuf + woff, wlen);
+				bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
 				if (bw < 0) {
 					if (cfg_rcv_trunc)
 						return 0;
@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 					return 111;
 				}
 
-				woff += bw;
-				wlen -= bw;
+				winfo->off += bw;
+				winfo->len -= bw;
 				total_wlen += bw;
-			} else if (wlen == 0) {
+			} else if (winfo->len == 0) {
 				/* We have no more data to send. */
 				fds.events &= ~POLLOUT;
 
@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd)
 	return (int)r;
 }
 
-static int do_mmap(int infd, int outfd, unsigned int size)
+static int spool_buf(int fd, struct wstate *winfo)
+{
+	while (winfo->len) {
+		int ret = write(fd, winfo->buf + winfo->off, winfo->len);
+
+		if (ret < 0) {
+			perror("write");
+			return 4;
+		}
+		winfo->off += ret;
+		winfo->len -= ret;
+	}
+	return 0;
+}
+
+static int do_mmap(int infd, int outfd, unsigned int size,
+		   struct wstate *winfo)
 {
 	char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
-	ssize_t ret = 0, off = 0;
+	ssize_t ret = 0, off = winfo->total_len;
 	size_t rem;
 
 	if (inbuf == MAP_FAILED) {
@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
 		return 1;
 	}
 
-	rem = size;
+	ret = spool_buf(outfd, winfo);
+	if (ret < 0)
+		return ret;
+
+	rem = size - winfo->total_len;
 
 	while (rem > 0) {
 		ret = write(outfd, inbuf + off, rem);
@@ -772,8 +833,16 @@ static int get_infd_size(int fd)
 	return (int)count;
 }
 
-static int do_sendfile(int infd, int outfd, unsigned int count)
+static int do_sendfile(int infd, int outfd, unsigned int count,
+		       struct wstate *winfo)
 {
+	int ret = spool_buf(outfd, winfo);
+
+	if (ret < 0)
+		return ret;
+
+	count -= winfo->total_len;
+
 	while (count > 0) {
 		ssize_t r;
 
@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
 }
 
 static int copyfd_io_mmap(int infd, int peerfd, int outfd,
-			  unsigned int size, bool *in_closed_after_out)
+			  unsigned int size, bool *in_closed_after_out,
+			  struct wstate *winfo)
 {
 	int err;
 
@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
 		if (err)
 			return err;
 
-		err = do_mmap(infd, peerfd, size);
+		err = do_mmap(infd, peerfd, size, winfo);
 	} else {
-		err = do_mmap(infd, peerfd, size);
+		err = do_mmap(infd, peerfd, size, winfo);
 		if (err)
 			return err;
 
@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
 }
 
 static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
-			      unsigned int size, bool *in_closed_after_out)
+			      unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
 {
 	int err;
 
@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 		if (err)
 			return err;
 
-		err = do_sendfile(infd, peerfd, size);
+		err = do_sendfile(infd, peerfd, size, winfo);
 	} else {
-		err = do_sendfile(infd, peerfd, size);
+		err = do_sendfile(infd, peerfd, size, winfo);
 		if (err)
 			return err;
 
@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 	return err;
 }
 
-static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
 {
 	bool in_closed_after_out = false;
 	struct timespec start, end;
@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
 
 	switch (cfg_mode) {
 	case CFG_MODE_POLL:
-		ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out);
+		ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
+				     winfo);
 		break;
 
 	case CFG_MODE_MMAP:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out);
+		ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
+				     &in_closed_after_out, winfo);
 		break;
 
 	case CFG_MODE_SENDFILE:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out);
+		ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
+					 &in_closed_after_out, winfo);
 		break;
 
 	default:
@@ -999,6 +1072,7 @@ static void maybe_close(int fd)
 int main_loop_s(int listensock)
 {
 	struct sockaddr_storage ss;
+	struct wstate winfo;
 	struct pollfd polls;
 	socklen_t salen;
 	int remotesock;
@@ -1033,7 +1107,8 @@ again:
 
 		SOCK_TEST_TCPULP(remotesock, 0);
 
-		copyfd_io(fd, remotesock, 1, true);
+		memset(&winfo, 0, sizeof(winfo));
+		copyfd_io(fd, remotesock, 1, true, &winfo);
 	} else {
 		perror("accept");
 		return 1;
@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name)
 		return;
 	}
 
+	if (strncmp(name, "MPTFO", len) == 0) {
+		cfg_sockopt_types.mptfo = 1;
+		return;
+	}
+
 	fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
 	exit(1);
 }
@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen)
 
 int main_loop(void)
 {
-	int fd, ret, fd_in = 0;
+	int fd = 0, ret, fd_in = 0;
 	struct addrinfo *peer;
+	struct wstate winfo;
+
+	if (cfg_input && cfg_sockopt_types.mptfo) {
+		fd_in = open(cfg_input, O_RDONLY);
+		if (fd < 0)
+			xerror("can't open %s:%d", cfg_input, errno);
+	}
 
-	/* listener is ready. */
-	fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
+	memset(&winfo, 0, sizeof(winfo));
+	fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
 	if (fd < 0)
 		return 2;
 
@@ -1186,14 +1273,13 @@ again:
 	if (cfg_cmsg_types.cmsg_enabled)
 		apply_cmsg_types(fd, &cfg_cmsg_types);
 
-	if (cfg_input) {
+	if (cfg_input && !cfg_sockopt_types.mptfo) {
 		fd_in = open(cfg_input, O_RDONLY);
 		if (fd < 0)
 			xerror("can't open %s:%d", cfg_input, errno);
 	}
 
-	/* close the client socket open only if we are not going to reconnect */
-	ret = copyfd_io(fd_in, fd, 1, 0);
+	ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
 	if (ret)
 		return ret;
 
@@ -1210,6 +1296,7 @@ again:
 			xerror("can't reconnect: %d", errno);
 		if (cfg_input)
 			close(fd_in);
+		memset(&winfo, 0, sizeof(winfo));
 		goto again;
 	} else {
 		close(fd);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 621af6895f4d..60198b91a530 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -762,6 +762,23 @@ run_tests_peekmode()
 	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
 }
 
+run_tests_mptfo()
+{
+	echo "INFO: with MPTFO start"
+	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
+	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
+
+	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+
+	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+
+	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
+	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
+	echo "INFO: with MPTFO end"
+}
+
 run_tests_disconnect()
 {
 	local peekmode="$1"
@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek"
 run_tests_peekmode "saveAfterPeek"
 stop_if_error "Tests with peek mode have failed"
 
+# MPTFO (MultiPath TCP Fatopen tests)
+run_tests_mptfo
+stop_if_error "Tests with MPTFO have failed"
+
 # connect to ns4 ip address, ns2 should intercept/proxy
 run_test_transparent 10.0.3.1 "tproxy ipv4"
 run_test_transparent dead:beef:3::1 "tproxy ipv6"
-- 
cgit v1.2.3-58-ga151


From 91a7de85600d5dfa272cea3cef83052e067dc0ab Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 28 Nov 2022 09:02:10 -0500
Subject: selftests/net: add csum offload test

Test NIC hardware checksum offload:

- Rx + Tx
- IPv4 + IPv6
- TCP + UDP

Optional features:

- zero checksum 0xFFFF
- checksum disable 0x0000
- transport encap headers
- randomization

See file header for detailed comments.

Expected results differ depending on NIC features:

- CHECKSUM_UNNECESSARY vs CHECKSUM_COMPLETE
- NETIF_F_HW_CSUM (csum_start/csum_off) vs NETIF_F_IP(V6)_CSUM

Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20221128140210.553391-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/.gitignore |   1 +
 tools/testing/selftests/net/Makefile   |   1 +
 tools/testing/selftests/net/csum.c     | 986 +++++++++++++++++++++++++++++++++
 3 files changed, 988 insertions(+)
 create mode 100644 tools/testing/selftests/net/csum.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ff8807cc9c2e..ee38ca888244 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 bind_bhash
+csum
 cmsg_sender
 fin_ack_lat
 gro
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 880e6ded6ed5..3007e98a6d64 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -74,6 +74,7 @@ TEST_PROGS += test_ingress_egress_chaining.sh
 TEST_GEN_PROGS += so_incoming_cpu
 TEST_PROGS += sctp_vrf.sh
 TEST_GEN_FILES += sctp_hello
+TEST_GEN_FILES += csum
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
new file mode 100644
index 000000000000..82a1c1839da6
--- /dev/null
+++ b/tools/testing/selftests/net/csum.c
@@ -0,0 +1,986 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
+ *
+ * The test runs on two machines to exercise the NIC. For this reason it
+ * is not integrated in kselftests.
+ *
+ *     CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
+ *
+ * Rx:
+ *
+ * The sender sends packets with a known checksum field using PF_INET(6)
+ * SOCK_RAW sockets.
+ *
+ * good packet: $CMD [-t]
+ * bad packet:  $CMD [-t] -E
+ *
+ * The receiver reads UDP packets with a UDP socket. This is not an
+ * option for TCP packets ('-t'). Optionally insert an iptables filter
+ * to avoid these entering the real protocol stack.
+ *
+ * The receiver also reads all packets with a PF_PACKET socket, to
+ * observe whether both good and bad packets arrive on the host. And to
+ * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
+ * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
+ *
+ * Tx:
+ *
+ * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
+ * checksum offload.
+ *
+ * The sender can sends packets with a UDP socket.
+ *
+ * Optionally crafts a packet that sums up to zero to verify that the
+ * device writes negative zero 0xFFFF in this case to distinguish from
+ * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
+ * by choosing a specific source port.
+ *
+ * good packet: $CMD -U
+ * zero csum:   $CMD -U -Z
+ *
+ * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
+ * to cover more protocols. PF_PACKET requires passing src and dst mac
+ * addresses.
+ *
+ * good packet: $CMD -s $smac -d $dmac -p [-t]
+ *
+ * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
+ * to verify that the NIC passes these packets unmodified.
+ *
+ * Argument '-e' adds a transport mode encapsulation header between
+ * network and transport header. This will fail for devices that parse
+ *  headers. Should work on devices that implement protocol agnostic tx
+ * checksum offload (NETIF_F_HW_CSUM).
+ *
+ * Argument '-r $SEED' optionally randomizes header, payload and length
+ * to increase coverage between packets sent. SEED 1 further chooses a
+ * different seed for each run (and logs this for reproducibility). It
+ * is advised to enable this for extra coverage in continuous testing.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static bool cfg_bad_csum;
+static int cfg_family = PF_INET6;
+static int cfg_num_pkt = 4;
+static bool cfg_do_rx = true;
+static bool cfg_do_tx = true;
+static bool cfg_encap;
+static char *cfg_ifname = "eth0";
+static char *cfg_mac_dst;
+static char *cfg_mac_src;
+static int cfg_proto = IPPROTO_UDP;
+static int cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static uint16_t cfg_port_dst = 34000;
+static uint16_t cfg_port_src = 33000;
+static uint16_t cfg_port_src_encap = 33001;
+static unsigned int cfg_random_seed;
+static int cfg_rcvbuf = 1 << 22;	/* be able to queue large cfg_num_pkt */
+static bool cfg_send_pfpacket;
+static bool cfg_send_udp;
+static int cfg_timeout_ms = 2000;
+static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
+static bool cfg_zero_sum;     /* create packet that adds up to zero */
+
+static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
+static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
+
+#define ENC_HEADER_LEN	(sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
+#define MAX_HEADER_LEN	(sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
+#define MAX_PAYLOAD_LEN 1024
+
+/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
+struct udp_encap_hdr {
+	uint8_t nexthdr;
+	uint8_t padding[3];
+};
+
+/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
+static void *iph_addr_p;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
+}
+
+static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = (uint16_t *)data;
+	int i;
+
+	for (i = 0; i < len / 2; i++)
+		sum += words[i];
+
+	if (len & 1)
+		sum += ((unsigned char *)data)[len - 1];
+
+	return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum_nofold(data, len, sum);
+
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static uint16_t checksum(void *th, uint16_t proto, size_t len)
+{
+	uint32_t sum;
+	int alen;
+
+	alen = cfg_family == PF_INET6 ? 32 : 8;
+
+	sum = checksum_nofold(iph_addr_p, alen, 0);
+	sum += htons(proto);
+	sum += htons(len);
+
+	/* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
+	if (cfg_do_tx && cfg_send_pfpacket)
+		return ~checksum_fold(NULL, 0, sum);
+	else
+		return checksum_fold(th, len, sum);
+}
+
+static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
+{
+	struct iphdr *iph = _iph;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->ttl = 8;
+	iph->protocol = proto;
+	iph->saddr = cfg_saddr4.sin_addr.s_addr;
+	iph->daddr = cfg_daddr4.sin_addr.s_addr;
+	iph->tot_len = htons(sizeof(*iph) + len);
+	iph->check = checksum_fold(iph, sizeof(*iph), 0);
+
+	iph_addr_p = &iph->saddr;
+
+	return iph + 1;
+}
+
+static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
+{
+	struct ipv6hdr *ip6h = _ip6h;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 64;
+	ip6h->saddr = cfg_saddr6.sin6_addr;
+	ip6h->daddr = cfg_daddr6.sin6_addr;
+
+	iph_addr_p = &ip6h->saddr;
+
+	return ip6h + 1;
+}
+
+static void *build_packet_udp(void *_uh)
+{
+	struct udphdr *uh = _uh;
+
+	uh->source = htons(cfg_port_src);
+	uh->dest = htons(cfg_port_dst);
+	uh->len = htons(sizeof(*uh) + cfg_payload_len);
+	uh->check = 0;
+
+	/* choose source port so that uh->check adds up to zero */
+	if (cfg_zero_sum) {
+		uh->source = 0;
+		uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+		fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
+			cfg_port_src, ntohs(uh->source));
+		cfg_port_src = ntohs(uh->source);
+	}
+
+	if (cfg_zero_disable)
+		uh->check = 0;
+	else
+		uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+	if (cfg_bad_csum)
+		uh->check = ~uh->check;
+
+	fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
+	return uh + 1;
+}
+
+static void *build_packet_tcp(void *_th)
+{
+	struct tcphdr *th = _th;
+
+	th->source = htons(cfg_port_src);
+	th->dest = htons(cfg_port_dst);
+	th->doff = 5;
+	th->check = 0;
+
+	th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
+
+	if (cfg_bad_csum)
+		th->check = ~th->check;
+
+	fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
+	return th + 1;
+}
+
+static char *build_packet_udp_encap(void *_uh)
+{
+	struct udphdr *uh = _uh;
+	struct udp_encap_hdr *eh = _uh + sizeof(*uh);
+
+	/* outer dst == inner dst, to simplify BPF filter
+	 * outer src != inner src, to demultiplex on recv
+	 */
+	uh->dest = htons(cfg_port_dst);
+	uh->source = htons(cfg_port_src_encap);
+	uh->check = 0;
+	uh->len = htons(sizeof(*uh) +
+			sizeof(*eh) +
+			sizeof(struct tcphdr) +
+			cfg_payload_len);
+
+	eh->nexthdr = IPPROTO_TCP;
+
+	return build_packet_tcp(eh + 1);
+}
+
+static char *build_packet(char *buf, int max_len, int *len)
+{
+	uint8_t proto;
+	char *off;
+	int tlen;
+
+	if (cfg_random_seed) {
+		int *buf32 = (void *)buf;
+		int i;
+
+		for (i = 0; i < (max_len / sizeof(int)); i++)
+			buf32[i] = rand();
+	} else {
+		memset(buf, cfg_payload_char, max_len);
+	}
+
+	if (cfg_proto == IPPROTO_UDP)
+		tlen = sizeof(struct udphdr) + cfg_payload_len;
+	else
+		tlen = sizeof(struct tcphdr) + cfg_payload_len;
+
+	if (cfg_encap) {
+		proto = IPPROTO_UDP;
+		tlen += ENC_HEADER_LEN;
+	} else {
+		proto = cfg_proto;
+	}
+
+	if (cfg_family == PF_INET)
+		off = build_packet_ipv4(buf, proto, tlen);
+	else
+		off = build_packet_ipv6(buf, proto, tlen);
+
+	if (cfg_encap)
+		off = build_packet_udp_encap(off);
+	else if (cfg_proto == IPPROTO_UDP)
+		off = build_packet_udp(off);
+	else
+		off = build_packet_tcp(off);
+
+	/* only pass the payload, but still compute headers for cfg_zero_sum */
+	if (cfg_send_udp) {
+		*len = cfg_payload_len;
+		return off;
+	}
+
+	*len = off - buf + cfg_payload_len;
+	return buf;
+}
+
+static int open_inet(int ipproto, int protocol)
+{
+	int fd;
+
+	fd = socket(cfg_family, ipproto, protocol);
+	if (fd == -1)
+		error(1, errno, "socket inet");
+
+	if (cfg_family == PF_INET6) {
+		/* may have been updated by cfg_zero_sum */
+		cfg_saddr6.sin6_port = htons(cfg_port_src);
+
+		if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
+			error(1, errno, "bind dgram 6");
+		if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+			error(1, errno, "connect dgram 6");
+	} else {
+		/* may have been updated by cfg_zero_sum */
+		cfg_saddr4.sin_port = htons(cfg_port_src);
+
+		if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
+			error(1, errno, "bind dgram 4");
+		if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+			error(1, errno, "connect dgram 4");
+	}
+
+	return fd;
+}
+
+static int open_packet(void)
+{
+	int fd, one = 1;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket packet");
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt packet_vnet_ndr");
+
+	return fd;
+}
+
+static void send_inet(int fd, const char *buf, int len)
+{
+	int ret;
+
+	ret = write(fd, buf, len);
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %d", ret);
+}
+
+static void eth_str_to_addr(const char *str, unsigned char *eth)
+{
+	if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
+		error(1, 0, "cannot parse mac addr %s", str);
+}
+
+static void send_packet(int fd, const char *buf, int len)
+{
+	struct virtio_net_hdr vh = {0};
+	struct sockaddr_ll addr = {0};
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	struct iovec iov[3];
+	int ret;
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_halen = ETH_ALEN;
+	addr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!addr.sll_ifindex)
+		error(1, errno, "if_nametoindex %s", cfg_ifname);
+
+	vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+	if (cfg_family == PF_INET6) {
+		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+		addr.sll_protocol = htons(ETH_P_IPV6);
+	} else {
+		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
+		addr.sll_protocol = htons(ETH_P_IP);
+	}
+
+	if (cfg_encap)
+		vh.csum_start += ENC_HEADER_LEN;
+
+	if (cfg_proto == IPPROTO_TCP) {
+		vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
+		vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
+	} else {
+		vh.csum_offset = __builtin_offsetof(struct udphdr, check);
+		vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
+	}
+
+	eth_str_to_addr(cfg_mac_src, eth.h_source);
+	eth_str_to_addr(cfg_mac_dst, eth.h_dest);
+	eth.h_proto = addr.sll_protocol;
+
+	iov[0].iov_base = &vh;
+	iov[0].iov_len = sizeof(vh);
+
+	iov[1].iov_base = &eth;
+	iov[1].iov_len = sizeof(eth);
+
+	iov[2].iov_base = (void *)buf;
+	iov[2].iov_len = len;
+
+	msg.msg_iov = iov;
+	msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+	msg.msg_name = &addr;
+	msg.msg_namelen = sizeof(addr);
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "sendmsg packet");
+	if (ret != sizeof(vh) + sizeof(eth) + len)
+		error(1, errno, "sendmsg packet: %u", ret);
+}
+
+static int recv_prepare_udp(void)
+{
+	int fd;
+
+	fd = socket(cfg_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+		error(1, errno, "setsockopt SO_RCVBUF r");
+
+	if (cfg_family == PF_INET6) {
+		if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+			error(1, errno, "bind r");
+	} else {
+		if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+			error(1, errno, "bind r");
+	}
+
+	return fd;
+}
+
+/* Filter out all traffic that is not cfg_proto with our destination port.
+ *
+ * Otherwise background noise may cause PF_PACKET receive queue overflow,
+ * dropping the expected packets and failing the test.
+ */
+static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
+		BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = sizeof(filter) / sizeof(struct sock_filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+static void recv_prepare_packet_filter(int fd)
+{
+	const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+
+	if (cfg_family == AF_INET)
+		__recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
+					     sizeof(struct iphdr) + off_dport);
+	else
+		__recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
+					     sizeof(struct ipv6hdr) + off_dport);
+}
+
+static void recv_prepare_packet_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+
+	if (cfg_family == PF_INET)
+		laddr.sll_protocol = htons(ETH_P_IP);
+	else
+		laddr.sll_protocol = htons(ETH_P_IPV6);
+
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, 0, "if_nametoindex %s", cfg_ifname);
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind pf_packet");
+}
+
+static int recv_prepare_packet(void)
+{
+	int fd, one = 1;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+		error(1, errno, "setsockopt SO_RCVBUF p");
+
+	/* enable auxdata to recv checksum status (valid vs unknown) */
+	if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
+		error(1, errno, "setsockopt auxdata");
+
+	/* install filter to restrict packet flow to match */
+	recv_prepare_packet_filter(fd);
+
+	/* bind to address family to start packet flow */
+	recv_prepare_packet_bind(fd);
+
+	return fd;
+}
+
+static int recv_udp(int fd)
+{
+	static char buf[MAX_PAYLOAD_LEN];
+	int ret, count = 0;
+
+	while (1) {
+		ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "recv r");
+
+		fprintf(stderr, "rx: udp: len=%u\n", ret);
+		count++;
+	}
+
+	return count;
+}
+
+static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
+{
+	uint16_t csum;
+
+	csum = checksum(th, cfg_proto, len);
+
+	fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
+		sport, len, csum_field, csum);
+
+	/* csum must be zero unless cfg_bad_csum indicates bad csum */
+	if (csum && !cfg_bad_csum) {
+		fprintf(stderr, "pkt: bad csum\n");
+		return 1;
+	} else if (cfg_bad_csum && !csum) {
+		fprintf(stderr, "pkt: good csum, while bad expected\n");
+		return 1;
+	}
+
+	if (cfg_zero_sum && csum_field != 0xFFFF) {
+		fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int recv_verify_packet_tcp(void *th, int len)
+{
+	struct tcphdr *tcph = th;
+
+	if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
+		return -1;
+
+	return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
+}
+
+static int recv_verify_packet_udp_encap(void *th, int len)
+{
+	struct udp_encap_hdr *eh = th;
+
+	if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
+		return -1;
+
+	return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
+}
+
+static int recv_verify_packet_udp(void *th, int len)
+{
+	struct udphdr *udph = th;
+
+	if (len < sizeof(*udph))
+		return -1;
+
+	if (udph->dest != htons(cfg_port_dst))
+		return -1;
+
+	if (udph->source == htons(cfg_port_src_encap))
+		return recv_verify_packet_udp_encap(udph + 1,
+						    len - sizeof(*udph));
+
+	return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
+}
+
+static int recv_verify_packet_ipv4(void *nh, int len)
+{
+	struct iphdr *iph = nh;
+	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+	if (len < sizeof(*iph) || iph->protocol != proto)
+		return -1;
+
+	iph_addr_p = &iph->saddr;
+	if (proto == IPPROTO_TCP)
+		return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+	else
+		return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
+}
+
+static int recv_verify_packet_ipv6(void *nh, int len)
+{
+	struct ipv6hdr *ip6h = nh;
+	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+	if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+		return -1;
+
+	iph_addr_p = &ip6h->saddr;
+
+	if (proto == IPPROTO_TCP)
+		return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+	else
+		return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+}
+
+/* return whether auxdata includes TP_STATUS_CSUM_VALID */
+static bool recv_verify_packet_csum(struct msghdr *msg)
+{
+	struct tpacket_auxdata *aux = NULL;
+	struct cmsghdr *cm;
+
+	if (msg->msg_flags & MSG_CTRUNC)
+		error(1, 0, "cmsg: truncated");
+
+	for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
+		if (cm->cmsg_level != SOL_PACKET ||
+		    cm->cmsg_type != PACKET_AUXDATA)
+			error(1, 0, "cmsg: level=%d type=%d\n",
+			      cm->cmsg_level, cm->cmsg_type);
+
+		if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
+			error(1, 0, "cmsg: len=%lu expected=%lu",
+			      cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
+
+		aux = (void *)CMSG_DATA(cm);
+	}
+
+	if (!aux)
+		error(1, 0, "cmsg: no auxdata");
+
+	return aux->tp_status & TP_STATUS_CSUM_VALID;
+}
+
+static int recv_packet(int fd)
+{
+	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+	unsigned long total = 0, bad_csums = 0, bad_validations = 0;
+	char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
+	struct pkt *buf = (void *)_buf;
+	struct msghdr msg = {0};
+	struct iovec iov;
+	int len, ret;
+
+	iov.iov_base = _buf;
+	iov.iov_len = sizeof(_buf);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = ctrl;
+	msg.msg_controllen = sizeof(ctrl);
+
+	while (1) {
+		msg.msg_flags = 0;
+
+		len = recvmsg(fd, &msg, MSG_DONTWAIT);
+		if (len == -1 && errno == EAGAIN)
+			break;
+		if (len == -1)
+			error(1, errno, "recv p");
+
+		if (cfg_family == PF_INET6)
+			ret = recv_verify_packet_ipv6(buf, len);
+		else
+			ret = recv_verify_packet_ipv4(buf, len);
+
+		if (ret == -1 /* skip: non-matching */)
+			continue;
+
+		total++;
+		if (ret == 1)
+			bad_csums++;
+
+		/* Fail if kernel returns valid for known bad csum.
+		 * Do not fail if kernel does not validate a good csum:
+		 * Absence of validation does not imply invalid.
+		 */
+		if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+			fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
+			bad_validations++;
+		}
+	}
+
+	if (bad_csums || bad_validations)
+		error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
+		      total, bad_csums, bad_validations);
+
+	return total;
+}
+
+static void parse_args(int argc, char *const argv[])
+{
+	const char *daddr = NULL, *saddr = NULL;
+	int c;
+
+	while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			break;
+		case 'd':
+			cfg_mac_dst = optarg;
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'e':
+			cfg_encap = true;
+			break;
+		case 'E':
+			cfg_bad_csum = true;
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'l':
+			cfg_payload_len = strtol(optarg, NULL, 0);
+			break;
+		case 'L':
+			cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
+			break;
+		case 'n':
+			cfg_num_pkt = strtol(optarg, NULL, 0);
+			break;
+		case 'r':
+			cfg_random_seed = strtol(optarg, NULL, 0);
+			break;
+		case 'P':
+			cfg_send_pfpacket = true;
+			break;
+		case 'R':
+			/* only Rx: used with two machine tests */
+			cfg_do_tx = false;
+			break;
+		case 's':
+			cfg_mac_src = optarg;
+			break;
+		case 'S':
+			saddr = optarg;
+			break;
+		case 't':
+			cfg_proto = IPPROTO_TCP;
+			break;
+		case 'T':
+			/* only Tx: used with two machine tests */
+			cfg_do_rx = false;
+			break;
+		case 'u':
+			cfg_proto = IPPROTO_UDP;
+			break;
+		case 'U':
+			/* send using real udp socket,
+			 * to exercise tx checksum offload
+			 */
+			cfg_send_udp = true;
+			break;
+		case 'z':
+			cfg_zero_disable = true;
+			break;
+		case 'Z':
+			cfg_zero_sum = true;
+			break;
+		default:
+			error(1, 0, "unknown arg %c", c);
+		}
+	}
+
+	if (!daddr || !saddr)
+		error(1, 0, "Must pass -D <daddr> and -S <saddr>");
+
+	if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
+		error(1, 0, "Transmit with pf_packet requires mac addresses");
+
+	if (cfg_payload_len > MAX_PAYLOAD_LEN)
+		error(1, 0, "Payload length exceeds max");
+
+	if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
+		error(1, 0, "Only UDP supports zero csum");
+
+	if (cfg_zero_sum && !cfg_send_udp)
+		error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
+	if (cfg_zero_sum && cfg_bad_csum)
+		error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
+	if (cfg_zero_sum && cfg_random_seed)
+		error(1, 0, "Cannot combine zero checksum conversion with randomization");
+
+	if (cfg_family == PF_INET6) {
+		cfg_saddr6.sin6_port = htons(cfg_port_src);
+		cfg_daddr6.sin6_port = htons(cfg_port_dst);
+
+		if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
+			error(1, errno, "Cannot parse ipv6 -D");
+		if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
+			error(1, errno, "Cannot parse ipv6 -S");
+	} else {
+		cfg_saddr4.sin_port = htons(cfg_port_src);
+		cfg_daddr4.sin_port = htons(cfg_port_dst);
+
+		if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
+			error(1, errno, "Cannot parse ipv4 -D");
+		if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
+			error(1, errno, "Cannot parse ipv4 -S");
+	}
+
+	if (cfg_do_tx && cfg_random_seed) {
+		/* special case: time-based seed */
+		if (cfg_random_seed == 1)
+			cfg_random_seed = (unsigned int)gettimeofday_ms();
+		srand(cfg_random_seed);
+		fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
+	}
+}
+
+static void do_tx(void)
+{
+	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+	char *buf;
+	int fd, len, i;
+
+	buf = build_packet(_buf, sizeof(_buf), &len);
+
+	if (cfg_send_pfpacket)
+		fd = open_packet();
+	else if (cfg_send_udp)
+		fd = open_inet(SOCK_DGRAM, 0);
+	else
+		fd = open_inet(SOCK_RAW, IPPROTO_RAW);
+
+	for (i = 0; i < cfg_num_pkt; i++) {
+		if (cfg_send_pfpacket)
+			send_packet(fd, buf, len);
+		else
+			send_inet(fd, buf, len);
+
+		/* randomize each packet individually to increase coverage */
+		if (cfg_random_seed) {
+			cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
+			buf = build_packet(_buf, sizeof(_buf), &len);
+		}
+	}
+
+	if (close(fd))
+		error(1, errno, "close tx");
+}
+
+static void do_rx(int fdp, int fdr)
+{
+	unsigned long count_udp = 0, count_pkt = 0;
+	long tleft, tstop;
+	struct pollfd pfd;
+
+	tstop = gettimeofday_ms() + cfg_timeout_ms;
+	tleft = cfg_timeout_ms;
+
+	do {
+		pfd.events = POLLIN;
+		pfd.fd = fdp;
+		if (poll(&pfd, 1, tleft) == -1)
+			error(1, errno, "poll");
+
+		if (pfd.revents & POLLIN)
+			count_pkt += recv_packet(fdp);
+
+		if (cfg_proto == IPPROTO_UDP)
+			count_udp += recv_udp(fdr);
+
+		tleft = tstop - gettimeofday_ms();
+	} while (tleft > 0);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdp))
+		error(1, errno, "close p");
+
+	if (count_pkt < cfg_num_pkt)
+		error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
+		      count_pkt, cfg_num_pkt);
+
+	if (cfg_proto == IPPROTO_UDP) {
+		if (cfg_bad_csum && count_udp)
+			error(1, 0, "rx: unexpected packets at udp");
+		if (!cfg_bad_csum && !count_udp)
+			error(1, 0, "rx: missing packets at udp");
+	}
+}
+
+int main(int argc, char *const argv[])
+{
+	int fdp = -1, fdr = -1;		/* -1 to silence -Wmaybe-uninitialized */
+
+	parse_args(argc, argv);
+
+	/* open receive sockets before transmitting */
+	if (cfg_do_rx) {
+		fdp = recv_prepare_packet();
+		fdr = recv_prepare_udp();
+	}
+
+	if (cfg_do_tx)
+		do_tx();
+
+	if (cfg_do_rx)
+		do_rx(fdp, fdr);
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 052c82dcdcbb6eb89d0967c309c010cd293076d0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:54 -0800
Subject: selftests/bpf: Use if_nametoindex instead of reading the
 /sys/net/class/*/ifindex

When switching netns, the setns_by_fd() is doing dances in mount/umounting
the /sys directories.  One reason is the tc_redirect.c test is depending
on the /sys/net/class/*/ifindex instead of using the if_nametoindex().
if_nametoindex() uses ioctl() to get the ifindex.

This patch is to move all /sys/net/class/*/ifindex usages to
if_nametoindex().  The current code checks ifindex >= 0 which is
incorrect.  ifindex > 0 should be checked instead.  This patch also
stores ifindex_veth_src and ifindex_veth_dst since the latter patch
will need them.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-2-martin.lau@linux.dev
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 46 +++++++++-------------
 1 file changed, 18 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index cb6a53b3e023..2d85742efdd3 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,12 +11,12 @@
  */
 
 #include <arpa/inet.h>
-#include <linux/if.h>
 #include <linux/if_tun.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
 #include <linux/time_types.h>
 #include <linux/net_tstamp.h>
+#include <net/if.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/stat.h>
@@ -115,7 +115,9 @@ static void netns_setup_namespaces_nofail(const char *verb)
 }
 
 struct netns_setup_result {
+	int ifindex_veth_src;
 	int ifindex_veth_src_fwd;
+	int ifindex_veth_dst;
 	int ifindex_veth_dst_fwd;
 };
 
@@ -139,27 +141,6 @@ static int get_ifaddr(const char *name, char *ifaddr)
 	return 0;
 }
 
-static int get_ifindex(const char *name)
-{
-	char path[PATH_MAX];
-	char buf[32];
-	FILE *f;
-	int ret;
-
-	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
-	f = fopen(path, "r");
-	if (!ASSERT_OK_PTR(f, path))
-		return -1;
-
-	ret = fread(buf, 1, sizeof(buf), f);
-	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
-		fclose(f);
-		return -1;
-	}
-	fclose(f);
-	return atoi(buf);
-}
-
 #define SYS(fmt, ...)						\
 	({							\
 		char cmd[1024];					\
@@ -182,11 +163,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
 		goto fail;
 
-	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
-	if (result->ifindex_veth_src_fwd < 0)
+	result->ifindex_veth_src = if_nametoindex("veth_src");
+	if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
 		goto fail;
-	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
-	if (result->ifindex_veth_dst_fwd < 0)
+
+	result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
+	if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+		goto fail;
+
+	result->ifindex_veth_dst = if_nametoindex("veth_dst");
+	if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+		goto fail;
+
+	result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
+	if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
 		goto fail;
 
 	SYS("ip link set veth_src netns " NS_SRC);
@@ -1034,8 +1024,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
 		goto fail;
 
-	ifindex = get_ifindex("tun_fwd");
-	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+	ifindex = if_nametoindex("tun_fwd");
+	if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
 		goto fail;
 
 	skel->rodata->IFINDEX_SRC = ifindex;
-- 
cgit v1.2.3-58-ga151


From 57d0863f1d2812da543ef49172f60d6aa14bedcf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:55 -0800
Subject: selftests/bpf: Avoid pinning bpf prog in the tc_redirect_dtime test

This patch removes the need to pin prog in the tc_redirect_dtime
test by directly using the bpf_tc_hook_create() and bpf_tc_attach().
The clsact qdisc will go away together with the test netns, so
no need to do bpf_tc_hook_destroy().

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-3-martin.lau@linux.dev
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 149 ++++++++++++++-------
 1 file changed, 100 insertions(+), 49 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 2d85742efdd3..690102f1ceda 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -250,6 +250,56 @@ fail:
 	return -1;
 }
 
+static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
+{
+	char err_str[128], ifname[16];
+	int err;
+
+	qdisc_hook->ifindex = ifindex;
+	qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+	err = bpf_tc_hook_create(qdisc_hook);
+	snprintf(err_str, sizeof(err_str),
+		 "qdisc add dev %s clsact",
+		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
+	err_str[sizeof(err_str) - 1] = 0;
+	ASSERT_OK(err, err_str);
+
+	return err;
+}
+
+static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
+			     enum bpf_tc_attach_point xgress,
+			     const struct bpf_program *prog, int priority)
+{
+	LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+	char err_str[128], ifname[16];
+	int err;
+
+	qdisc_hook->attach_point = xgress;
+	tc_attach.prog_fd = bpf_program__fd(prog);
+	tc_attach.priority = priority;
+	err = bpf_tc_attach(qdisc_hook, &tc_attach);
+	snprintf(err_str, sizeof(err_str),
+		 "filter add dev %s %s prio %d bpf da %s",
+		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
+		 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
+		 priority, bpf_program__name(prog));
+	err_str[sizeof(err_str) - 1] = 0;
+	ASSERT_OK(err, err_str);
+
+	return err;
+}
+
+#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({		\
+	if ((err = qdisc_clsact_create(qdisc_hook, ifindex)))	\
+		goto fail;					\
+})
+
+#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({		\
+	if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority)))	\
+		goto fail;							\
+})
+
 static int netns_load_bpf(void)
 {
 	SYS("tc qdisc add dev veth_src_fwd clsact");
@@ -489,78 +539,79 @@ done:
 		close(client_fd);
 }
 
-static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
+static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
+				const struct netns_setup_result *setup_result)
 {
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
 	struct nstoken *nstoken;
-
-#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
-#define PIN(__prog) ({							\
-		int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
-		if (!ASSERT_OK(err, "pin " #__prog))		\
-			goto fail;					\
-		})
+	int err;
 
 	/* setup ns_src tc progs */
 	nstoken = open_netns(NS_SRC);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
 		return -1;
-	PIN(egress_host);
-	PIN(ingress_host);
-	SYS("tc qdisc add dev veth_src clsact");
-	SYS("tc filter add dev veth_src ingress bpf da object-pinned "
-	    PIN_FNAME(ingress_host));
-	SYS("tc filter add dev veth_src egress bpf da object-pinned "
-	    PIN_FNAME(egress_host));
+	/* tc qdisc add dev veth_src clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
+	/* tc filter add dev veth_src ingress bpf da ingress_host */
+	XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+	/* tc filter add dev veth_src egress bpf da egress_host */
+	XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
 	close_netns(nstoken);
 
 	/* setup ns_dst tc progs */
 	nstoken = open_netns(NS_DST);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
 		return -1;
-	PIN(egress_host);
-	PIN(ingress_host);
-	SYS("tc qdisc add dev veth_dst clsact");
-	SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
-	    PIN_FNAME(ingress_host));
-	SYS("tc filter add dev veth_dst egress bpf da object-pinned "
-	    PIN_FNAME(egress_host));
+	/* tc qdisc add dev veth_dst clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
+	/* tc filter add dev veth_dst ingress bpf da ingress_host */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+	/* tc filter add dev veth_dst egress bpf da egress_host */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
 	close_netns(nstoken);
 
 	/* setup ns_fwd tc progs */
 	nstoken = open_netns(NS_FWD);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
 		return -1;
-	PIN(ingress_fwdns_prio100);
-	PIN(egress_fwdns_prio100);
-	PIN(ingress_fwdns_prio101);
-	PIN(egress_fwdns_prio101);
-	SYS("tc qdisc add dev veth_dst_fwd clsact");
-	SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
-	    PIN_FNAME(ingress_fwdns_prio100));
-	SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
-	    PIN_FNAME(ingress_fwdns_prio101));
-	SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
-	    PIN_FNAME(egress_fwdns_prio100));
-	SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
-	    PIN_FNAME(egress_fwdns_prio101));
-	SYS("tc qdisc add dev veth_src_fwd clsact");
-	SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
-	    PIN_FNAME(ingress_fwdns_prio100));
-	SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
-	    PIN_FNAME(ingress_fwdns_prio101));
-	SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
-	    PIN_FNAME(egress_fwdns_prio100));
-	SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
-	    PIN_FNAME(egress_fwdns_prio101));
+	/* tc qdisc add dev veth_dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+	/* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+			  skel->progs.ingress_fwdns_prio100, 100);
+	/* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+			  skel->progs.ingress_fwdns_prio101, 101);
+	/* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+			  skel->progs.egress_fwdns_prio100, 100);
+	/* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+			  skel->progs.egress_fwdns_prio101, 101);
+
+	/* tc qdisc add dev veth_src_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
+	/* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+			  skel->progs.ingress_fwdns_prio100, 100);
+	/* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+			  skel->progs.ingress_fwdns_prio101, 101);
+	/* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+			  skel->progs.egress_fwdns_prio100, 100);
+	/* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+			  skel->progs.egress_fwdns_prio101, 101);
 	close_netns(nstoken);
-
-#undef PIN
-
 	return 0;
 
 fail:
 	close_netns(nstoken);
-	return -1;
+	return err;
 }
 
 enum {
@@ -736,7 +787,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
 		goto done;
 
-	if (netns_load_dtime_bpf(skel))
+	if (netns_load_dtime_bpf(skel, setup_result))
 		goto done;
 
 	nstoken = open_netns(NS_FWD);
-- 
cgit v1.2.3-58-ga151


From f1b73577bb3c5b23872fde2907386635ea726d6d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:56 -0800
Subject: selftests/bpf: Avoid pinning bpf prog in the tc_redirect_peer_l3 test

This patch removes the need to pin prog in the tc_redirect_peer_l3
test by directly using the bpf_tc_hook_create() and bpf_tc_attach().
The clsact qdisc will go away together with the test netns, so
no need to do bpf_tc_hook_destroy().

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-4-martin.lau@linux.dev
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 32 ++++++++--------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 690102f1ceda..cefde6491749 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1032,6 +1032,8 @@ static int tun_relay_loop(int src_fd, int target_fd)
 
 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 {
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
 	struct test_tc_peer *skel = NULL;
 	struct nstoken *nstoken = NULL;
 	int err;
@@ -1086,31 +1088,21 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK(err, "test_tc_peer__load"))
 		goto fail;
 
-	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
-		goto fail;
-
-	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
-		goto fail;
-
-	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
-		goto fail;
-
 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
 	 * towards dst, and "tc_dst" to redirect packets
 	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
 	 */
-	SYS("tc qdisc add dev tun_fwd clsact");
-	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
-	    SRC_PROG_PIN_FILE);
+	/* tc qdisc add dev tun_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
+	/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
+	XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
 
-	SYS("tc qdisc add dev veth_dst_fwd clsact");
-	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
-	    DST_PROG_PIN_FILE);
-	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
-	    CHK_PROG_PIN_FILE);
+	/* tc qdisc add dev veth_dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+	/* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+	/* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
 
 	/* Setup route and neigh tables */
 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
-- 
cgit v1.2.3-58-ga151


From 5dc42a7fc2866a56bc5616babace0a252458fe01 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:57 -0800
Subject: selftests/bpf: Avoid pinning bpf prog in the netns_load_bpf() callers

This patch removes the need to pin prog in the remaining tests in
tc_redirect.c by directly using the bpf_tc_hook_create() and
bpf_tc_attach().  The clsact qdisc will go away together with
the test netns, so no need to do bpf_tc_hook_destroy().

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-5-martin.lau@linux.dev
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 83 +++++++---------------
 1 file changed, 27 insertions(+), 56 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index cefde6491749..6f800381f924 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -59,10 +59,6 @@
 #define IFADDR_STR_LEN 18
 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
 
-#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
-#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
-#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
-
 #define TIMEOUT_MILLIS 10000
 #define NSEC_PER_SEC 1000000000ULL
 
@@ -300,19 +296,28 @@ static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
 		goto fail;							\
 })
 
-static int netns_load_bpf(void)
+static int netns_load_bpf(const struct bpf_program *src_prog,
+			  const struct bpf_program *dst_prog,
+			  const struct bpf_program *chk_prog,
+			  const struct netns_setup_result *setup_result)
 {
-	SYS("tc qdisc add dev veth_src_fwd clsact");
-	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
-	    SRC_PROG_PIN_FILE);
-	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
-	    CHK_PROG_PIN_FILE);
-
-	SYS("tc qdisc add dev veth_dst_fwd clsact");
-	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
-	    DST_PROG_PIN_FILE);
-	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
-	    CHK_PROG_PIN_FILE);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+	int err;
+
+	/* tc qdisc add dev veth_src_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
+	/* tc filter add dev veth_src_fwd ingress bpf da src_prog */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+	/* tc filter add dev veth_src_fwd egress bpf da chk_prog */
+	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+	/* tc qdisc add dev veth_dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
+	/* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+	/* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
+	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
 	return 0;
 fail:
@@ -829,7 +834,6 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 {
 	struct nstoken *nstoken = NULL;
 	struct test_tc_neigh_fib *skel = NULL;
-	int err;
 
 	nstoken = open_netns(NS_FWD);
 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
@@ -842,19 +846,8 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
 		goto done;
 
-	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
-		goto done;
-
-	if (netns_load_bpf())
+	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+			   skel->progs.tc_chk, setup_result))
 		goto done;
 
 	/* bpf_fib_lookup() checks if forwarding is enabled */
@@ -890,19 +883,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
 		goto done;
 
-	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
-		goto done;
-
-	if (netns_load_bpf())
+	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+			   skel->progs.tc_chk, setup_result))
 		goto done;
 
 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
@@ -937,19 +919,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK(err, "test_tc_peer__load"))
 		goto done;
 
-	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
-		goto done;
-
-	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
-		goto done;
-
-	if (netns_load_bpf())
+	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+			   skel->progs.tc_chk, setup_result))
 		goto done;
 
 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
-- 
cgit v1.2.3-58-ga151


From 3084097c369c57c9bbd81e5bf47b36b6b27390fc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:58 -0800
Subject: selftests/bpf: Remove the "/sys" mount and umount dance in
 {open,close}_netns

The previous patches have removed the need to do the mount and umount
dance when switching netns. In particular:
* Avoid remounting /sys/fs/bpf to have a clean start
* Avoid remounting /sys to get a ifindex of a particular netns

This patch can finally remove the mount and umount dance in
{open,close}_netns which is unnecessarily complicated and
error-prone.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-6-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/network_helpers.c | 51 +++------------------------
 1 file changed, 5 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 1f37adff7632..01de33191226 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -390,49 +390,6 @@ struct nstoken {
 	int orig_netns_fd;
 };
 
-static int setns_by_fd(int nsfd)
-{
-	int err;
-
-	err = setns(nsfd, CLONE_NEWNET);
-	close(nsfd);
-
-	if (!ASSERT_OK(err, "setns"))
-		return err;
-
-	/* Switch /sys to the new namespace so that e.g. /sys/class/net
-	 * reflects the devices in the new namespace.
-	 */
-	err = unshare(CLONE_NEWNS);
-	if (!ASSERT_OK(err, "unshare"))
-		return err;
-
-	/* Make our /sys mount private, so the following umount won't
-	 * trigger the global umount in case it's shared.
-	 */
-	err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
-	if (!ASSERT_OK(err, "remount private /sys"))
-		return err;
-
-	err = umount2("/sys", MNT_DETACH);
-	if (!ASSERT_OK(err, "umount2 /sys"))
-		return err;
-
-	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
-	if (!ASSERT_OK(err, "mount /sys"))
-		return err;
-
-	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
-	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
-		return err;
-
-	err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL);
-	if (!ASSERT_OK(err, "mount /sys/kernel/debug"))
-		return err;
-
-	return 0;
-}
-
 struct nstoken *open_netns(const char *name)
 {
 	int nsfd;
@@ -453,8 +410,9 @@ struct nstoken *open_netns(const char *name)
 	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
 		goto fail;
 
-	err = setns_by_fd(nsfd);
-	if (!ASSERT_OK(err, "setns_by_fd"))
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
+	if (!ASSERT_OK(err, "setns"))
 		goto fail;
 
 	return token;
@@ -465,6 +423,7 @@ fail:
 
 void close_netns(struct nstoken *token)
 {
-	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+	ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
+	close(token->orig_netns_fd);
 	free(token);
 }
-- 
cgit v1.2.3-58-ga151


From 9b6a7773973707bad13f6c8b2a27e93ef4d46182 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:08:59 -0800
Subject: selftests/bpf: Remove serial from tests using {open,close}_netns

After removing the mount/umount dance from {open,close}_netns()
in the pervious patch, "serial_" can be removed from
the tests using {open,close}_netns().

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-7-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/prog_tests/empty_skb.c       | 2 +-
 tools/testing/selftests/bpf/prog_tests/tc_redirect.c     | 2 +-
 tools/testing/selftests/bpf/prog_tests/test_tunnel.c     | 2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 0613f3bb8b5e..32dd731e9070 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -9,7 +9,7 @@
 		goto out; \
 })
 
-void serial_test_empty_skb(void)
+void test_empty_skb(void)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	struct empty_skb *bpf_obj = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 6f800381f924..bca5e6839ac4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1138,7 +1138,7 @@ static void *test_tc_redirect_run_tests(void *arg)
 	return NULL;
 }
 
-void serial_test_tc_redirect(void)
+void test_tc_redirect(void)
 {
 	pthread_t test_thread;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index eea274110267..07ad457f3370 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -421,7 +421,7 @@ static void *test_tunnel_run_tests(void *arg)
 	return NULL;
 }
 
-void serial_test_tunnel(void)
+void test_tunnel(void)
 {
 	pthread_t test_thread;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 9ac6f6a268db..a50971c6cf4a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd)
 }
 
 #define NUM_PKTS 10000
-void serial_test_xdp_do_redirect(void)
+void test_xdp_do_redirect(void)
 {
 	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
 	char data[sizeof(pkt_udp) + sizeof(__u32)];
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index 13daa3746064..c72083885b6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -174,7 +174,7 @@ out:
 	system("ip netns del synproxy");
 }
 
-void serial_test_xdp_synproxy(void)
+void test_xdp_synproxy(void)
 {
 	if (test__start_subtest("xdp"))
 		test_synproxy(true);
-- 
cgit v1.2.3-58-ga151


From 443f216448ab5ddd1b4d08ad6c9b69628ac25adf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Mon, 28 Nov 2022 23:09:00 -0800
Subject: selftests/bpf: Avoid pinning prog when attaching to tc ingress in
 btf_skc_cls_ingress

This patch removes the need to pin prog when attaching to tc ingress
in the btf_skc_cls_ingress test.  Instead, directly use the
bpf_tc_hook_create() and bpf_tc_attach().  The qdisc clsact
will go away together with the netns, so no need to
bpf_tc_hook_destroy().

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221129070900.3142427-8-martin.lau@linux.dev
---
 .../selftests/bpf/prog_tests/btf_skc_cls_ingress.c | 25 +++++++++-------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 7a277035c275..ef4d6a3ae423 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -9,6 +9,7 @@
 #include <string.h>
 #include <errno.h>
 #include <sched.h>
+#include <net/if.h>
 #include <linux/compiler.h>
 #include <bpf/libbpf.h>
 
@@ -20,10 +21,12 @@ static struct test_btf_skc_cls_ingress *skel;
 static struct sockaddr_in6 srv_sa6;
 static __u32 duration;
 
-#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
-
 static int prepare_netns(void)
 {
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+		    .prog_fd = bpf_program__fd(skel->progs.cls_ingress));
+
 	if (CHECK(unshare(CLONE_NEWNET), "create netns",
 		  "unshare(CLONE_NEWNET): %s (%d)",
 		  strerror(errno), errno))
@@ -33,12 +36,12 @@ static int prepare_netns(void)
 		  "ip link set dev lo up", "failed\n"))
 		return -1;
 
-	if (CHECK(system("tc qdisc add dev lo clsact"),
-		  "tc qdisc add dev lo clsact", "failed\n"))
+	qdisc_lo.ifindex = if_nametoindex("lo");
+	if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
 		return -1;
 
-	if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
-		  "install tc cls-prog at ingress", "failed\n"))
+	if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+		       "filter add dev lo ingress"))
 		return -1;
 
 	/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
@@ -195,19 +198,12 @@ static struct test tests[] = {
 
 void test_btf_skc_cls_ingress(void)
 {
-	int i, err;
+	int i;
 
 	skel = test_btf_skc_cls_ingress__open_and_load();
 	if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
 		return;
 
-	err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
-	if (CHECK(err, "bpf_program__pin",
-		  "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
-		test_btf_skc_cls_ingress__destroy(skel);
-		return;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		if (!test__start_subtest(tests[i].desc))
 			continue;
@@ -221,6 +217,5 @@ void test_btf_skc_cls_ingress(void)
 		reset_test();
 	}
 
-	bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
 	test_btf_skc_cls_ingress__destroy(skel);
 }
-- 
cgit v1.2.3-58-ga151


From b42693415b86f608049cf1b4870adc1dc65e58b0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 30 Nov 2022 12:00:12 -0800
Subject: libbpf: Avoid enum forward-declarations in public API in C++ mode

C++ enum forward declarations are fundamentally not compatible with pure
C enum definitions, and so libbpf's use of `enum bpf_stats_type;`
forward declaration in libbpf/bpf.h public API header is causing C++
compilation issues.

More details can be found in [0], but it comes down to C++ supporting
enum forward declaration only with explicitly specified backing type:

  enum bpf_stats_type: int;

In C (and I believe it's a GCC extension also), such forward declaration
is simply:

  enum bpf_stats_type;

Further, in Linux UAPI this enum is defined in pure C way:

enum bpf_stats_type { BPF_STATS_RUN_TIME = 0; }

And even though in both cases backing type is int, which can be
confirmed by looking at DWARF information, for C++ compiler actual enum
definition and forward declaration are incompatible.

To eliminate this problem, for C++ mode define input argument as int,
which makes enum unnecessary in libbpf public header. This solves the
issue and as demonstrated by next patch doesn't cause any unwanted
compiler warnings, at least with default warnings setting.

  [0] https://stackoverflow.com/questions/42766839/c11-enum-forward-causes-underlying-type-mismatch
  [1] Closes: https://github.com/libbpf/libbpf/issues/249

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221130200013.2997831-1-andrii@kernel.org
---
 tools/lib/bpf/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a112e0ed1b19..7468978d3c27 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -409,8 +409,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);
 
+#ifdef __cplusplus
+/* forward-declaring enums in C++ isn't compatible with pure C enums, so
+ * instead define bpf_enable_stats() as accepting int as an input
+ */
+LIBBPF_API int bpf_enable_stats(int type);
+#else
 enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
 LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+#endif
 
 struct bpf_prog_bind_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
-- 
cgit v1.2.3-58-ga151


From f8186bf65ae6a4ce96d5cf52c2c9481c0e2193ce Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 30 Nov 2022 12:00:13 -0800
Subject: selftests/bpf: Make sure enum-less bpf_enable_stats() API works in
 C++ mode

Just a simple test to make sure we don't introduce unwanted compiler
warnings and API still supports passing enums as input argument.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221130200013.2997831-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_cpp.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index 19ad172036da..0bd9990e83fa 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 #include <iostream>
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
 #include <bpf/libbpf.h>
-#pragma GCC diagnostic pop
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
 #include "test_core_extern.skel.h"
@@ -99,6 +99,7 @@ int main(int argc, char *argv[])
 	struct btf_dump_opts opts = { };
 	struct test_core_extern *skel;
 	struct btf *btf;
+	int fd;
 
 	try_skeleton_template();
 
@@ -117,6 +118,12 @@ int main(int argc, char *argv[])
 	skel = test_core_extern__open_and_load();
 	test_core_extern__destroy(skel);
 
+	fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (fd < 0)
+		std::cout << "FAILED to enable stats: " << fd << std::endl;
+	else
+		::close(fd);
+
 	std::cout << "DONE!" << std::endl;
 
 	return 0;
-- 
cgit v1.2.3-58-ga151


From 7d7cfb48d81353e826493d24c7cec7360950968f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Nov 2022 16:00:09 +0100
Subject: netfilter: conntrack: set icmpv6 redirects as RELATED

icmp conntrack will set icmp redirects as RELATED, but icmpv6 will not
do this.

For icmpv6, only icmp errors (code <= 128) are examined for RELATED state.
ICMPV6 Redirects are part of neighbour discovery mechanism, those are
handled by marking a selected subset (e.g.  neighbour solicitations) as
UNTRACKED, but not REDIRECT -- they will thus be flagged as INVALID.

Add minimal support for REDIRECTs.  No parsing of neighbour options is
added for simplicity, so this will only check that we have the embeeded
original header (ND_OPT_REDIRECT_HDR), and then attempt to do a flow
lookup for this tuple.

Also extend the existing test case to cover redirects.

Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.")
Reported-by: Eric Garver <eric@garver.life>
Link: https://github.com/firewalld/firewalld/issues/1046
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Garver <eric@garver.life>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_icmpv6.c          | 53 ++++++++++++++++++++++
 .../selftests/netfilter/conntrack_icmp_related.sh  | 36 ++++++++++++++-
 2 files changed, 87 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 61e3b05cf02c..1020d67600a9 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -129,6 +129,56 @@ static void icmpv6_error_log(const struct sk_buff *skb,
 	nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
 }
 
+static noinline_for_stack int
+nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb,
+			     unsigned int dataoff,
+			     const struct nf_hook_state *state)
+{
+	u8 hl = ipv6_hdr(skb)->hop_limit;
+	union nf_inet_addr outer_daddr;
+	union {
+		struct nd_opt_hdr nd_opt;
+		struct rd_msg rd_msg;
+	} tmp;
+	const struct nd_opt_hdr *nd_opt;
+	const struct rd_msg *rd_msg;
+
+	rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg);
+	if (!rd_msg) {
+		icmpv6_error_log(skb, state, "short redirect");
+		return -NF_ACCEPT;
+	}
+
+	if (rd_msg->icmph.icmp6_code != 0)
+		return NF_ACCEPT;
+
+	if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
+		icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect");
+		return -NF_ACCEPT;
+	}
+
+	dataoff += sizeof(*rd_msg);
+
+	/* warning: rd_msg no longer usable after this call */
+	nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt);
+	if (!nd_opt || nd_opt->nd_opt_len == 0) {
+		icmpv6_error_log(skb, state, "redirect without options");
+		return -NF_ACCEPT;
+	}
+
+	/* We could call ndisc_parse_options(), but it would need
+	 * skb_linearize() and a bit more work.
+	 */
+	if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR)
+		return NF_ACCEPT;
+
+	memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+	       sizeof(outer_daddr.ip6));
+	dataoff += 8;
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMPV6, &outer_daddr);
+}
+
 int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb,
 			      unsigned int dataoff,
@@ -159,6 +209,9 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 		return NF_ACCEPT;
 	}
 
+	if (icmp6h->icmp6_type == NDISC_REDIRECT)
+		return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state);
+
 	/* is not error message ? */
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
index b48e1833bc89..76645aaf2b58 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -35,6 +35,8 @@ cleanup() {
 	for i in 1 2;do ip netns del nsrouter$i;done
 }
 
+trap cleanup EXIT
+
 ipv4() {
     echo -n 192.168.$1.2
 }
@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF
 table inet filter {
 	counter unknown { }
 	counter related { }
+	counter redir4 { }
+	counter redir6 { }
 	chain input {
 		type filter hook input priority 0; policy accept;
-		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
 
+		icmp type "redirect" ct state "related" counter name "redir4" accept
+		icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
 		meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
 		counter name "unknown" drop
 	}
 }
@@ -279,5 +287,29 @@ else
 	echo "ERROR: icmp error RELATED state test has failed"
 fi
 
-cleanup
+# add 'bad' route,  expect icmp REDIRECT to be generated
+ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+
+ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+check_counter nsclient1 "redir4" "$expect"
+if [ $? -ne 0 ];then
+	ret=1
+fi
+
+ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+check_counter nsclient1 "redir6" "$expect"
+if [ $? -ne 0 ];then
+	ret=1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp redirects had RELATED state"
+else
+	echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
 exit $ret
-- 
cgit v1.2.3-58-ga151


From 996c060e2bb90e5caef42849846b56da21ea88d9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Nov 2022 23:20:49 +0100
Subject: selftests/bpf: Add bench test to arm64 and s390x denylist

BPF CI fails for arm64 and s390x each with the following result:

  [...]
  All error logs:

  serial_test_kprobe_multi_bench_attach:PASS:get_syms 0 nsec
  serial_test_kprobe_multi_bench_attach:PASS:kprobe_multi_empty__open_and_load 0 nsec
  libbpf: prog 'test_kprobe_empty': failed to attach: Operation not supported
  serial_test_kprobe_multi_bench_attach:FAIL:bpf_program__attach_kprobe_multi_opts unexpected error: -95
  #92      kprobe_multi_bench_attach:FAIL
  [...]

Add the test to the deny list.

Fixes: 5b6c7e5c4434 ("selftests/bpf: Add attach bench test")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/DENYLIST.aarch64 | 1 +
 tools/testing/selftests/bpf/DENYLIST.s390x   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 8e77515d56f6..99cc33c51eaa 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -28,6 +28,7 @@ kfree_skb                                        # attach fentry unexpected erro
 kfunc_call/subprog                               # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
 kfunc_call/subprog_lskel                         # skel unexpected error: -2
 kfunc_dynptr_param/dynptr_data_null              # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
+kprobe_multi_bench_attach                        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/attach_api_addrs               # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/attach_api_pattern             # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test/attach_api_syms                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 648a8a1b6b78..3481f3a5ea6f 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -29,6 +29,7 @@ htab_update                              # failed to attach: ERROR: strerror_r(-
 kfree_skb                                # attach fentry unexpected error: -524                                        (trampoline)
 kfunc_call                               # 'bpf_prog_active': not found in kernel BTF                                  (?)
 kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
+kprobe_multi_bench_attach                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test                        # relies on fentry
 ksyms_module                             # test_ksyms_module__open_and_load unexpected error: -9                       (?)
 ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
-- 
cgit v1.2.3-58-ga151


From c67cae551f0df80421b5703ee56ff5e2fe9c4de6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 25 Nov 2022 14:06:17 -0800
Subject: bpf: Tighten ptr_to_btf_id checks.

The networking programs typically don't require CAP_PERFMON, but through kfuncs
like bpf_cast_to_kern_ctx() they can access memory through PTR_TO_BTF_ID. In
such case enforce CAP_PERFMON.
Also make sure that only GPL programs can access kernel data structures.
All kfuncs require GPL already.

Also remove allow_ptr_to_map_access. It's the same as allow_ptr_leaks and
different name for the same check only causes confusion.

Fixes: fd264ca02094 ("bpf: Add a kfunc to type cast from bpf uapi ctx to kernel ctx")
Fixes: 50c6b8a9aea2 ("selftests/bpf: Add a test for btf_type_tag "percpu"")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221125220617.26846-1-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h                                     |  5 -----
 include/linux/bpf_verifier.h                            |  1 -
 kernel/bpf/verifier.c                                   | 17 ++++++++++++++---
 tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c |  1 +
 tools/testing/selftests/bpf/verifier/map_ptr.c          |  8 ++++----
 5 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 67452103bb86..4920ac252754 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1909,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void)
 	return perfmon_capable();
 }
 
-static inline bool bpf_allow_ptr_to_map_access(void)
-{
-	return perfmon_capable();
-}
-
 static inline bool bpf_bypass_spec_v1(void)
 {
 	return perfmon_capable();
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c05aa6e1f6f5..b5090e89cb3f 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -531,7 +531,6 @@ struct bpf_verifier_env {
 	bool explore_alu_limits;
 	bool allow_ptr_leaks;
 	bool allow_uninit_stack;
-	bool allow_ptr_to_map_access;
 	bool bpf_capable;
 	bool bypass_spec_v1;
 	bool bypass_spec_v4;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e7f1d085e53..d0ecc0b18b20 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4703,6 +4703,18 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	u32 btf_id;
 	int ret;
 
+	if (!env->allow_ptr_leaks) {
+		verbose(env,
+			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+			tname);
+		return -EPERM;
+	}
+	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
+		verbose(env,
+			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
+			tname);
+		return -EINVAL;
+	}
 	if (off < 0) {
 		verbose(env,
 			"R%d is ptr_%s invalid negative access: off=%d\n",
@@ -4823,9 +4835,9 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 
-	if (!env->allow_ptr_to_map_access) {
+	if (!env->allow_ptr_leaks) {
 		verbose(env,
-			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
 			tname);
 		return -EPERM;
 	}
@@ -16679,7 +16691,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
 
 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
 	env->allow_uninit_stack = bpf_allow_uninit_stack();
-	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
 	env->bpf_capable = bpf_capable();
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 8feddb8289cf..38f78d9345de 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -64,3 +64,4 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
 
 	return 0;
 }
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index 1f82021429bf..17ee84dc7766 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -9,7 +9,7 @@
 	},
 	.fixup_map_array_48b = { 1 },
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = REJECT,
 	.errstr = "R1 is bpf_array invalid negative access: off=-8",
 },
@@ -26,7 +26,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = REJECT,
 	.errstr = "only read from bpf_array is supported",
 },
@@ -41,7 +41,7 @@
 	},
 	.fixup_map_array_48b = { 1 },
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = REJECT,
 	.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -57,7 +57,7 @@
 	},
 	.fixup_map_array_48b = { 1 },
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = ACCEPT,
 	.retval = 1,
 },
-- 
cgit v1.2.3-58-ga151


From 89903dcb3c2e134fb101de7921a19dd9f8418b4c Mon Sep 17 00:00:00 2001
From: Pengcheng Yang <yangpc@wangsu.com>
Date: Tue, 29 Nov 2022 18:40:41 +0800
Subject: selftests/bpf: Add ingress tests for txmsg with apply_bytes

Currently, the ingress redirect is not covered in "txmsg test apply".

Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/1669718441-2654-5-git-send-email-yangpc@wangsu.com
---
 tools/testing/selftests/bpf/test_sockmap.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index e768181a1bd7..024a0faafb3b 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1690,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
 {
 	txmsg_pass = 1;
 	txmsg_redir = 0;
+	txmsg_ingress = 0;
 	txmsg_apply = 1;
 	txmsg_cork = 0;
 	test_send_one(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
+	txmsg_ingress = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	test_send_one(opt, cgrp);
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_ingress = 1;
 	txmsg_apply = 1;
 	txmsg_cork = 0;
 	test_send_one(opt, cgrp);
 
 	txmsg_pass = 1;
 	txmsg_redir = 0;
+	txmsg_ingress = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	test_send_large(opt, cgrp);
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_ingress = 0;
 	txmsg_apply = 1024;
 	txmsg_cork = 0;
 	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
+	txmsg_ingress = 1;
 	txmsg_apply = 1024;
 	txmsg_cork = 0;
 	test_send_large(opt, cgrp);
-- 
cgit v1.2.3-58-ga151


From 78b037bd402df8eca0f45ef003c6d0ab25a26ecc Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Thu, 1 Dec 2022 10:34:06 -0800
Subject: selftests/bpf: Validate multiple ref release_on_unlock logic

Modify list_push_pop_multiple to alloc and insert nodes 2-at-a-time.
Without the previous patch's fix, this block of code:

  bpf_spin_lock(lock);
  bpf_list_push_front(head, &f[i]->node);
  bpf_list_push_front(head, &f[i + 1]->node);
  bpf_spin_unlock(lock);

would fail check_reference_leak check as release_on_unlock logic would miss
a ref that should've been released.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221201183406.1203621-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/linked_list.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 2c7b615c6d41..4ad88da5cda2 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -99,13 +99,28 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea
 	struct foo *f[8], *pf;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(f); i++) {
+	/* Loop following this check adds nodes 2-at-a-time in order to
+	 * validate multiple release_on_unlock release logic
+	 */
+	if (ARRAY_SIZE(f) % 2)
+		return 10;
+
+	for (i = 0; i < ARRAY_SIZE(f); i += 2) {
 		f[i] = bpf_obj_new(typeof(**f));
 		if (!f[i])
 			return 2;
 		f[i]->data = i;
+
+		f[i + 1] = bpf_obj_new(typeof(**f));
+		if (!f[i + 1]) {
+			bpf_obj_drop(f[i]);
+			return 9;
+		}
+		f[i + 1]->data = i + 1;
+
 		bpf_spin_lock(lock);
 		bpf_list_push_front(head, &f[i]->node);
+		bpf_list_push_front(head, &f[i + 1]->node);
 		bpf_spin_unlock(lock);
 	}
 
-- 
cgit v1.2.3-58-ga151


From b4e0df4cafe112220c19d30439732d22289147f1 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 30 Nov 2022 15:06:23 +0100
Subject: selftests: mptcp: run mptcp_inq from a clean netns

A new "sandbox" net namespace is available where no other netfilter
rules have been added.

Use this new netns instead of re-using "ns1" and clean it.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 80d36f7cfee8..8a83100b212e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -282,7 +282,7 @@ run_tests()
 
 do_tcpinq_test()
 {
-	ip netns exec "$ns1" ./mptcp_inq "$@"
+	ip netns exec "$ns_sbox" ./mptcp_inq "$@"
 	lret=$?
 	if [ $lret -ne 0 ];then
 		ret=$lret
@@ -298,9 +298,6 @@ do_tcpinq_tests()
 {
 	local lret=0
 
-	ip netns exec "$ns1" iptables -F
-	ip netns exec "$ns1" ip6tables -F
-
 	for args in "-t tcp" "-r tcp"; do
 		do_tcpinq_test $args
 		lret=$?
-- 
cgit v1.2.3-58-ga151


From b71dd705179cfd493b17cdf67f90b19ccfe45069 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 30 Nov 2022 15:06:24 +0100
Subject: selftests: mptcp: removed defined but unused vars

Some variables were set but never used.

This was not causing any issues except adding some confusion and having
shellcheck complaining about them.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 ---
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 1 -
 tools/testing/selftests/net/mptcp/simult_flows.sh  | 3 ---
 3 files changed, 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 60198b91a530..63b722b505e5 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -781,7 +781,6 @@ run_tests_mptfo()
 
 run_tests_disconnect()
 {
-	local peekmode="$1"
 	local old_cin=$cin
 	local old_sin=$sin
 
@@ -789,7 +788,6 @@ run_tests_disconnect()
 
 	# force do_transfer to cope with the multiple tranmissions
 	sin="$cin.disconnect"
-	sin_disconnect=$old_sin
 	cin="$cin.disconnect"
 	cin_disconnect="$old_cin"
 	connect_per_transfer=3
@@ -800,7 +798,6 @@ run_tests_disconnect()
 
 	# restore previous status
 	sin=$old_sin
-	sin_disconnect="$cout".disconnect
 	cin=$old_cin
 	cin_disconnect="$cin".disconnect
 	connect_per_transfer=1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 8a83100b212e..6e8f4599cc44 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -10,7 +10,6 @@ ksft_skip=4
 timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
-do_all_tests=1
 
 add_mark_rules()
 {
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index a5aeefd58ab3..189a664aed81 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -148,9 +148,6 @@ do_transfer()
 	:> "$sout"
 	:> "$capout"
 
-	local addr_port
-	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
-
 	if $capture; then
 		local capuser
 		if [ -z $SUDO_USER ] ; then
-- 
cgit v1.2.3-58-ga151


From 787eb1e4df93f469e932ef686dd9dc767a9a2392 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 30 Nov 2022 15:06:25 +0100
Subject: selftests: mptcp: uniform 'rndh' variable

The definition of 'rndh' was probably copied from one script to another
but some times, 'sec' was not defined, not used and/or not spelled
properly.

Here all the 'rndh' are now defined the same way.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/diag.sh          | 1 +
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 +--
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 5 +++--
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 2 ++
 tools/testing/selftests/net/mptcp/simult_flows.sh  | 1 +
 tools/testing/selftests/net/mptcp/userspace_pm.sh  | 2 +-
 6 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 515859a5168b..24bcd7b9bdb2 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+sec=$(date +%s)
 rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
 ns="ns1-$rndh"
 ksft_skip=4
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 63b722b505e5..a43d3e2f59bb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -274,8 +274,7 @@ check_transfer()
 
 check_mptcp_disabled()
 {
-	local disabled_ns
-	disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
+	local disabled_ns="ns_disabled-$rndh"
 	ip netns add ${disabled_ns} || exit $ksft_skip
 
 	# net.mptcp.enabled should be enabled by default
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2eeaf4aca644..2a402b3b771f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -59,8 +59,9 @@ init_partial()
 {
 	capout=$(mktemp)
 
-	local rndh
-	rndh=$(mktemp -u XXXXXX)
+	local sec rndh
+	sec=$(date +%s)
+	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
 
 	ns1="ns1-$rndh"
 	ns2="ns2-$rndh"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 6e8f4599cc44..dbee386450f3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -30,6 +30,8 @@ add_mark_rules()
 
 init()
 {
+	local sec rndh
+	sec=$(date +%s)
 	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
 
 	ns1="ns1-$rndh"
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 189a664aed81..9f22f7e5027d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+sec=$(date +%s)
 rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
 ns1="ns1-$rndh"
 ns2="ns2-$rndh"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 3229725b64b0..5dfc3ee74b98 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -33,7 +33,7 @@ client_addr_id=${RANDOM:0:2}
 server_addr_id=${RANDOM:0:2}
 
 sec=$(date +%s)
-rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX)
+rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
 ns1="ns1-$rndh"
 ns2="ns2-$rndh"
 
-- 
cgit v1.2.3-58-ga151


From de2392028a19e6841cd0ef14b9c832a7a2bc3a69 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 30 Nov 2022 15:06:26 +0100
Subject: selftests: mptcp: clearly declare global ns vars

It is clearer to declare these global variables at the beginning of the
file as it is done in other MPTCP selftests rather than in functions in
the middle of the script.

So for uniformity reason, we can do the same here in mptcp_sockopt.sh.

Suggested-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index dbee386450f3..f74b237bcb32 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -11,6 +11,12 @@ timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
 
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns_sbox="ns_sbox-$rndh"
+
 add_mark_rules()
 {
 	local ns=$1
@@ -30,14 +36,6 @@ add_mark_rules()
 
 init()
 {
-	local sec rndh
-	sec=$(date +%s)
-	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-
-	ns1="ns1-$rndh"
-	ns2="ns2-$rndh"
-	ns_sbox="ns_sbox-$rndh"
-
 	for netns in "$ns1" "$ns2" "$ns_sbox";do
 		ip netns add $netns || exit $ksft_skip
 		ip -net $netns link set lo up
-- 
cgit v1.2.3-58-ga151


From 5f17f8e315ad77c7ba45afe169b4598efd5d5bc3 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 30 Nov 2022 15:06:27 +0100
Subject: selftests: mptcp: declare var as local

Just to avoid classical Bash pitfall where variables are accidentally
overridden by other functions because the proper scope has not been
defined.

That's also what is done in other MPTCP selftests scripts where all non
local variables are defined at the beginning of the script and the
others are defined with the "local" keyword.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 51 ++++++++++++----------
 1 file changed, 29 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index f74b237bcb32..1b70c0a304ce 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -22,6 +22,7 @@ add_mark_rules()
 	local ns=$1
 	local m=$2
 
+	local t
 	for t in iptables ip6tables; do
 		# just to debug: check we have multiple subflows connection requests
 		ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
@@ -36,6 +37,7 @@ add_mark_rules()
 
 init()
 {
+	local netns
 	for netns in "$ns1" "$ns2" "$ns_sbox";do
 		ip netns add $netns || exit $ksft_skip
 		ip -net $netns link set lo up
@@ -44,6 +46,7 @@ init()
 		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
 	done
 
+	local i
 	for i in `seq 1 4`; do
 		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
 		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
@@ -73,6 +76,7 @@ init()
 
 cleanup()
 {
+	local netns
 	for netns in "$ns1" "$ns2" "$ns_sbox"; do
 		ip netns del $netns
 	done
@@ -103,15 +107,17 @@ check_mark()
 	local ns=$1
 	local af=$2
 
-	tables=iptables
+	local tables=iptables
 
 	if [ $af -eq 6 ];then
 		tables=ip6tables
 	fi
 
+	local counters values
 	counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
 	values=${counters%DROP*}
 
+	local v
 	for v in $values; do
 		if [ $v -ne 0 ]; then
 			echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
@@ -131,9 +137,9 @@ print_file_err()
 
 check_transfer()
 {
-	in=$1
-	out=$2
-	what=$3
+	local in=$1
+	local out=$2
+	local what=$3
 
 	cmp "$in" "$out" > /dev/null 2>&1
 	if [ $? -ne 0 ] ;then
@@ -156,18 +162,18 @@ is_v6()
 
 do_transfer()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	cl_proto="$3"
-	srv_proto="$4"
-	connect_addr="$5"
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local cl_proto="$3"
+	local srv_proto="$4"
+	local connect_addr="$5"
 
-	port=12001
+	local port=12001
 
 	:> "$cout"
 	:> "$sout"
 
-	mptcp_connect="./mptcp_connect -r 20"
+	local mptcp_connect="./mptcp_connect -r 20"
 
 	local local_addr
 	if is_v6 "${connect_addr}"; then
@@ -180,7 +186,7 @@ do_transfer()
 		ip netns exec ${listener_ns} \
 			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \
 				${local_addr} < "$sin" > "$sout" &
-	spid=$!
+	local spid=$!
 
 	sleep 1
 
@@ -189,12 +195,12 @@ do_transfer()
 			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \
 				$connect_addr < "$cin" > "$cout" &
 
-	cpid=$!
+	local cpid=$!
 
 	wait $cpid
-	retc=$?
+	local retc=$?
 	wait $spid
-	rets=$?
+	local rets=$?
 
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		echo " client exit code $retc, server $rets" 1>&2
@@ -229,9 +235,9 @@ do_transfer()
 
 make_file()
 {
-	name=$1
-	who=$2
-	size=$3
+	local name=$1
+	local who=$2
+	local size=$3
 
 	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
@@ -264,9 +270,9 @@ do_mptcp_sockopt_tests()
 
 run_tests()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
 	local lret=0
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
@@ -282,7 +288,7 @@ run_tests()
 do_tcpinq_test()
 {
 	ip netns exec "$ns_sbox" ./mptcp_inq "$@"
-	lret=$?
+	local lret=$?
 	if [ $lret -ne 0 ];then
 		ret=$lret
 		echo "FAIL: mptcp_inq $@" 1>&2
@@ -297,6 +303,7 @@ do_tcpinq_tests()
 {
 	local lret=0
 
+	local args
 	for args in "-t tcp" "-r tcp"; do
 		do_tcpinq_test $args
 		lret=$?
-- 
cgit v1.2.3-58-ga151


From 7dff74f5716edf2f0288fe4cdcb4feeeb6a9383b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 30 Nov 2022 15:06:29 +0100
Subject: selftests: mptcp: enhance userspace pm tests

Some userspace pm tests failed since pm listener events have been added.
Now MPTCP_EVENT_LISTENER_CREATED event becomes the first item in the
events list like this:

 type:15,family:2,sport:10006,saddr4:0.0.0.0
 type:1,token:3701282876,server_side:1,family:2,saddr4:10.0.1.1,...

And no token value in this MPTCP_EVENT_LISTENER_CREATED event.

This patch fixes this by specifying the type 1 item to search for token
values.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh   | 3 ++-
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2a402b3b771f..f10ef65a7009 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -830,7 +830,8 @@ do_transfer()
 			if [ $userspace_pm -eq 0 ]; then
 				pm_nl_add_endpoint $ns1 $addr flags signal
 			else
-				tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1")
+				tk=$(grep "type:1," "$evts_ns1" |
+				     sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
 				ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id
 				sleep 1
 				ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 5dfc3ee74b98..08a88ea47a29 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -172,9 +172,10 @@ make_connection()
 	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
 				      "$client_evts")
 	kill_wait $server_evts_pid
-	server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
-	server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
-				      "$server_evts")
+	server_token=$(grep "type:1," "$server_evts" |
+		       sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
+	server_serverside=$(grep "type:1," "$server_evts" |
+			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
 	rm -f "$client_evts" "$server_evts" "$file"
 
 	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
-- 
cgit v1.2.3-58-ga151


From 1cc94ac1af4b18c69981425df6f0355f13d9304d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 30 Nov 2022 15:06:30 +0100
Subject: selftests: mptcp: make evts global in userspace_pm

This patch makes server_evts and client_evts global in userspace_pm.sh,
then these two variables could be used in test_announce(), test_remove()
and test_subflows(). The local variable 'evts' in these three functions
then could be dropped.

Also move local variable 'file' as a global one.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 213 ++++++++++------------
 1 file changed, 93 insertions(+), 120 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 08a88ea47a29..91d6f30e2fe4 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -15,7 +15,11 @@ SUB_CLOSED=11      # MPTCP_EVENT_SUB_CLOSED
 AF_INET=2
 AF_INET6=10
 
-evts_pid=0
+file=""
+server_evts=""
+client_evts=""
+server_evts_pid=0
+client_evts_pid=0
 client4_pid=0
 server4_pid=0
 client6_pid=0
@@ -47,7 +51,7 @@ cleanup()
 {
 	echo "cleanup"
 
-	rm -rf $file
+	rm -rf $file $client_evts $server_evts
 
 	# Terminate the MPTCP connection and related processes
 	if [ $client4_pid -ne 0 ]; then
@@ -62,8 +66,11 @@ cleanup()
 	if [ $server6_pid -ne 0 ]; then
 		kill_wait $server6_pid
 	fi
-	if [ $evts_pid -ne 0 ]; then
-		kill_wait $evts_pid
+	if [ $server_evts_pid -ne 0 ]; then
+		kill_wait $server_evts_pid
+	fi
+	if [ $client_evts_pid -ne 0 ]; then
+		kill_wait $client_evts_pid
 	fi
 	local netns
 	for netns in "$ns1" "$ns2" ;do
@@ -113,8 +120,9 @@ make_file()
 
 make_connection()
 {
-	local file
-	file=$(mktemp)
+	if [ -z "$file" ]; then
+		file=$(mktemp)
+	fi
 	make_file "$file" "client"
 
 	local is_v6=$1
@@ -132,16 +140,24 @@ make_connection()
 
 	# Capture netlink events over the two network namespaces running
 	# the MPTCP client and server
-	local client_evts
-	client_evts=$(mktemp)
+	if [ -z "$client_evts" ]; then
+		client_evts=$(mktemp)
+	fi
 	:>"$client_evts"
+	if [ $client_evts_pid -ne 0 ]; then
+		kill_wait $client_evts_pid
+	fi
 	ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
-	local client_evts_pid=$!
-	local server_evts
-	server_evts=$(mktemp)
+	client_evts_pid=$!
+	if [ -z "$server_evts" ]; then
+		server_evts=$(mktemp)
+	fi
 	:>"$server_evts"
+	if [ $server_evts_pid -ne 0 ]; then
+		kill_wait $server_evts_pid
+	fi
 	ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
-	local server_evts_pid=$!
+	server_evts_pid=$!
 	sleep 0.5
 
 	# Run the server
@@ -159,7 +175,6 @@ make_connection()
 	sleep 1
 
 	# Capture client/server attributes from MPTCP connection netlink events
-	kill_wait $client_evts_pid
 
 	local client_token
 	local client_port
@@ -171,12 +186,10 @@ make_connection()
 	client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
 	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
 				      "$client_evts")
-	kill_wait $server_evts_pid
 	server_token=$(grep "type:1," "$server_evts" |
 		       sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
 	server_serverside=$(grep "type:1," "$server_evts" |
 			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
-	rm -f "$client_evts" "$server_evts" "$file"
 
 	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
 		   [ "$server_serverside" = 1 ]
@@ -240,13 +253,8 @@ verify_announce_event()
 
 test_announce()
 {
-	local evts
-	evts=$(mktemp)
 	# Capture events on the network namespace running the server
-	:>"$evts"
-	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$server_evts"
 
 	# ADD_ADDR using an invalid token should result in no action
 	local invalid_token=$(( client4_token - 1))
@@ -254,7 +262,7 @@ test_announce()
 	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
 
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 	stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token    \t\t"
 	if [ "$type" = "" ]
 	then
@@ -265,71 +273,63 @@ test_announce()
 	fi
 
 	# ADD_ADDR from the client to server machine reusing the subflow port
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2"\
 	   ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
 	   ns2eth1 > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\
+	verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
 			      "$client4_port"
 
 	# ADD_ADDR6 from the client to server machine reusing the subflow port
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl ann\
 	   dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+	verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
 			      "$client_addr_id" "$client6_port" "v6"
 
 	# ADD_ADDR from the client to server machine using a new port
-	:>"$evts"
+	:>"$server_evts"
 	client_addr_id=$((client_addr_id+1))
 	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
 	   $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+	verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
 			      "$client_addr_id" "$new4_port"
 
-	kill_wait $evts_pid
-
 	# Capture events on the network namespace running the client
-	:>"$evts"
-	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$client_evts"
 
 	# ADD_ADDR from the server to client machine reusing the subflow port
 	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
 	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
 			      "$server_addr_id" "$app4_port"
 
 	# ADD_ADDR6 from the server to client machine reusing the subflow port
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
 	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
 			      "$server_addr_id" "$app6_port" "v6"
 
 	# ADD_ADDR from the server to client machine using a new port
-	:>"$evts"
+	:>"$client_evts"
 	server_addr_id=$((server_addr_id+1))
 	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
 	   $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id
 	sleep 0.5
-	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
 			      "$server_addr_id" "$new4_port"
-
-	kill_wait $evts_pid
-	rm -f "$evts"
 }
 
 verify_remove_event()
@@ -357,14 +357,8 @@ verify_remove_event()
 
 test_remove()
 {
-	local evts
-	evts=$(mktemp)
-
 	# Capture events on the network namespace running the server
-	:>"$evts"
-	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$server_evts"
 
 	# RM_ADDR using an invalid token should result in no action
 	local invalid_token=$(( client4_token - 1 ))
@@ -373,7 +367,7 @@ test_remove()
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token                    \t"\
 	       $client_addr_id
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 	if [ "$type" = "" ]
 	then
 		stdbuf -o0 -e0 printf "[OK]\n"
@@ -387,7 +381,7 @@ test_remove()
 	   $invalid_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id                    \t"\
 	       $invalid_id
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 	if [ "$type" = "" ]
 	then
 		stdbuf -o0 -e0 printf "[OK]\n"
@@ -396,40 +390,35 @@ test_remove()
 	fi
 
 	# RM_ADDR from the client to server machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
 	   $client_addr_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
 	       $client_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+	verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
 
 	# RM_ADDR from the client to server machine
-	:>"$evts"
+	:>"$server_evts"
 	client_addr_id=$(( client_addr_id - 1 ))
 	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
 	   $client_addr_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
 	       $client_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+	verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
 
 	# RM_ADDR6 from the client to server machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
 	   $client_addr_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1                               \t"\
 	       $client_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id"
-
-	kill_wait $evts_pid
+	verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
 
 	# Capture events on the network namespace running the client
-	:>"$evts"
-	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$client_evts"
 
 	# RM_ADDR from the server to client machine
 	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
@@ -437,27 +426,24 @@ test_remove()
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t"\
 	       $server_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+	verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
 
 	# RM_ADDR from the server to client machine
-	:>"$evts"
+	:>"$client_evts"
 	server_addr_id=$(( server_addr_id - 1 ))
 	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
 	   $server_addr_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t" $server_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+	verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
 
 	# RM_ADDR6 from the server to client machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
 	   $server_addr_id > /dev/null 2>&1
 	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2                               \t" $server_addr_id
 	sleep 0.5
-	verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id"
-
-	kill_wait $evts_pid
-	rm -f "$evts"
+	verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
 }
 
 verify_subflow_events()
@@ -533,13 +519,8 @@ verify_subflow_events()
 
 test_subflows()
 {
-	local evts
-	evts=$(mktemp)
 	# Capture events on the network namespace running the server
-	:>"$evts"
-	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$server_evts"
 
 	# Attempt to add a listener at 10.0.2.2:<subflow-port>
 	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
@@ -552,25 +533,25 @@ test_subflows()
 	sleep 0.5
 
 	# CREATE_SUBFLOW from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
 	   rport "$client4_port" token "$server4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\
+	verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \
 			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
 	local sport
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 
 	# DESTROY_SUBFLOW from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
 	   "$client4_port" token "$server4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
 			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
 
 	# RM_ADDR from client to server machine
@@ -584,31 +565,31 @@ test_subflows()
 	listener_pid=$!
 
 	# ADD_ADDR6 from client to server machine reusing the subflow port
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
 	   $client_addr_id > /dev/null 2>&1
 	sleep 0.5
 
 	# CREATE_SUBFLOW6 from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
 	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+	verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
 			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 
 	# DESTROY_SUBFLOW6 from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
 	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
 			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
 			      "$client_addr_id" "ns1" "ns2"
 
@@ -623,44 +604,39 @@ test_subflows()
 	listener_pid=$!
 
 	# ADD_ADDR from client to server machine using a new port
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
 	   $client_addr_id port $new4_port > /dev/null 2>&1
 	sleep 0.5
 
 	# CREATE_SUBFLOW from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
 	   $new4_port token "$server4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+	verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
 			      "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
 
 	# DESTROY_SUBFLOW from server to client machine
-	:>"$evts"
+	:>"$server_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
 	   $new4_port token "$server4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
 			      "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
 
 	# RM_ADDR from client to server machine
 	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
 	   "$client4_token" > /dev/null 2>&1
 
-	kill_wait $evts_pid
-
 	# Capture events on the network namespace running the client
-	:>"$evts"
-	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
-	evts_pid=$!
-	sleep 0.5
+	:>"$client_evts"
 
 	# Attempt to add a listener at 10.0.2.1:<subflow-port>
 	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
@@ -673,24 +649,24 @@ test_subflows()
 	sleep 0.5
 
 	# CREATE_SUBFLOW from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
 	   $app4_port token "$client4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\
+	verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\
 			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
 
 	# DESTROY_SUBFLOW from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
 	   $app4_port token "$client4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+	verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
 			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# RM_ADDR from server to client machine
@@ -704,17 +680,17 @@ test_subflows()
 	listener_pid=$!
 
 	# ADD_ADDR6 from server to client machine reusing the subflow port
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
 	   $server_addr_id > /dev/null 2>&1
 	sleep 0.5
 
 	# CREATE_SUBFLOW6 from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
 	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\
+	verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
 			      "$AF_INET6" "dead:beef:2::2"\
 			      "dead:beef:2::1" "$app6_port" "23"\
 			      "$server_addr_id" "ns2" "ns1"
@@ -722,14 +698,14 @@ test_subflows()
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
 
 	# DESTROY_SUBFLOW6 from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
 	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\
+	verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\
 			      "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# RM_ADDR6 from server to client machine
@@ -743,38 +719,35 @@ test_subflows()
 	listener_pid=$!
 
 	# ADD_ADDR from server to client machine using a new port
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
 	   $server_addr_id port $new4_port > /dev/null 2>&1
 	sleep 0.5
 
 	# CREATE_SUBFLOW from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
 	   $new4_port token "$client4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+	verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
 			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
 
 	# DESTROY_SUBFLOW from client to server machine
-	:>"$evts"
+	:>"$client_evts"
 	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
 	   $new4_port token "$client4_token" > /dev/null 2>&1
 	sleep 0.5
-	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+	verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
 			      "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# RM_ADDR from server to client machine
 	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
 	   "$server4_token" > /dev/null 2>&1
-
-	kill_wait $evts_pid
-	rm -f "$evts"
 }
 
 test_prio()
-- 
cgit v1.2.3-58-ga151


From 6c73008aa301b7456b80d0e1416a240815fe947b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 30 Nov 2022 15:06:31 +0100
Subject: selftests: mptcp: listener test for userspace PM

This patch adds test coverage for listening sockets created by userspace
processes.

It adds a new test named test_listener() and a new verifying helper
verify_listener_events(). The new output looks like this:

 CREATE_SUBFLOW 10.0.2.2 (ns2) => 10.0.2.1 (ns1)              [OK]
 DESTROY_SUBFLOW 10.0.2.2 (ns2) => 10.0.2.1 (ns1)             [OK]
 MP_PRIO TX                                                   [OK]
 MP_PRIO RX                                                   [OK]
 CREATE_LISTENER 10.0.2.2:37106				      [OK]
 CLOSE_LISTENER 10.0.2.2:37106				      [OK]

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 76 +++++++++++++++++++++++
 1 file changed, 76 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 91d6f30e2fe4..a29deb9fa024 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -11,6 +11,8 @@ ANNOUNCED=6        # MPTCP_EVENT_ANNOUNCED
 REMOVED=7          # MPTCP_EVENT_REMOVED
 SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
 SUB_CLOSED=11      # MPTCP_EVENT_SUB_CLOSED
+LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
+LISTENER_CLOSED=16  #MPTCP_EVENT_LISTENER_CLOSED
 
 AF_INET=2
 AF_INET6=10
@@ -781,11 +783,85 @@ test_prio()
 	fi
 }
 
+verify_listener_events()
+{
+	local evt=$1
+	local e_type=$2
+	local e_family=$3
+	local e_saddr=$4
+	local e_sport=$5
+	local type
+	local family
+	local saddr
+	local sport
+
+	if [ $e_type = $LISTENER_CREATED ]; then
+		stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\
+			$e_saddr $e_sport
+	elif [ $e_type = $LISTENER_CLOSED ]; then
+		stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\
+			$e_saddr $e_sport
+	fi
+
+	type=$(grep "type:$e_type," $evt |
+	       sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
+	family=$(grep "type:$e_type," $evt |
+		 sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
+	sport=$(grep "type:$e_type," $evt |
+		sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	if [ $family ] && [ $family = $AF_INET6 ]; then
+		saddr=$(grep "type:$e_type," $evt |
+			sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+	else
+		saddr=$(grep "type:$e_type," $evt |
+			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+	fi
+
+	if [ $type ] && [ $type = $e_type ] &&
+	   [ $family ] && [ $family = $e_family ] &&
+	   [ $saddr ] && [ $saddr = $e_saddr ] &&
+	   [ $sport ] && [ $sport = $e_sport ]; then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_listener()
+{
+	# Capture events on the network namespace running the client
+	:>$client_evts
+
+	# Attempt to add a listener at 10.0.2.2:<subflow-port>
+	ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\
+		$client4_port > /dev/null 2>&1 &
+	local listener_pid=$!
+
+	verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
+
+	# ADD_ADDR from client to server machine reusing the subflow port
+	ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\
+		$client_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+		rport $client4_port token $server4_token > /dev/null 2>&1
+	sleep 0.5
+
+	# Delete the listener from the client ns, if one was created
+	kill_wait $listener_pid
+
+	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
+}
+
 make_connection
 make_connection "v6"
 test_announce
 test_remove
 test_subflows
 test_prio
+test_listener
 
 exit 0
-- 
cgit v1.2.3-58-ga151


From a3735625572d8f9cac3eb12a43c869d97ccbf584 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 30 Nov 2022 15:06:32 +0100
Subject: selftests: mptcp: make evts global in mptcp_join

This patch moves evts_ns1 and evts_ns2 out of do_transfer() as two global
variables in mptcp_join.sh. Init them in init() and remove them in
cleanup().

Add a new helper reset_with_events() to save the outputs of 'pm_nl_ctl
events' command in them. And a new helper kill_events_pids() to kill
pids of 'pm_nl_ctl events' command. Use these helpers in userspace pm
tests.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 52 ++++++++++++++-----------
 1 file changed, 29 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f10ef65a7009..32a3694c57fb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -26,6 +26,10 @@ ip_mptcp=0
 check_invert=0
 validate_checksum=0
 init=0
+evts_ns1=""
+evts_ns2=""
+evts_ns1_pid=0
+evts_ns2_pid=0
 
 declare -A all_tests
 declare -a only_tests_ids
@@ -154,6 +158,8 @@ init() {
 	cin=$(mktemp)
 	cinsent=$(mktemp)
 	cout=$(mktemp)
+	evts_ns1=$(mktemp)
+	evts_ns2=$(mktemp)
 
 	trap cleanup EXIT
 
@@ -165,6 +171,7 @@ cleanup()
 {
 	rm -f "$cin" "$cout" "$sinfail"
 	rm -f "$sin" "$sout" "$cinsent" "$cinfail"
+	rm -rf $evts_ns1 $evts_ns2
 	cleanup_partial
 }
 
@@ -320,6 +327,18 @@ reset_with_fail()
 		index 100 || exit 1
 }
 
+reset_with_events()
+{
+	reset "${1}" || return 1
+
+	:> "$evts_ns1"
+	:> "$evts_ns2"
+	ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 &
+	evts_ns1_pid=$!
+	ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 &
+	evts_ns2_pid=$!
+}
+
 fail_test()
 {
 	ret=1
@@ -473,6 +492,12 @@ kill_wait()
 	wait $1 2>/dev/null
 }
 
+kill_events_pids()
+{
+	kill_wait $evts_ns1_pid
+	kill_wait $evts_ns2_pid
+}
+
 pm_nl_set_limits()
 {
 	local ns=$1
@@ -673,10 +698,6 @@ do_transfer()
 	local port=$((10000 + TEST_COUNT - 1))
 	local cappid
 	local userspace_pm=0
-	local evts_ns1
-	local evts_ns1_pid
-	local evts_ns2
-	local evts_ns2_pid
 
 	:> "$cout"
 	:> "$sout"
@@ -753,17 +774,6 @@ do_transfer()
 		addr_nr_ns2=${addr_nr_ns2:9}
 	fi
 
-	if [ $userspace_pm -eq 1 ]; then
-		evts_ns1=$(mktemp)
-		evts_ns2=$(mktemp)
-		:> "$evts_ns1"
-		:> "$evts_ns2"
-		ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 &
-		evts_ns1_pid=$!
-		ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 &
-		evts_ns2_pid=$!
-	fi
-
 	local local_addr
 	if is_v6 "${connect_addr}"; then
 		local_addr="::"
@@ -982,12 +992,6 @@ do_transfer()
 	    kill $cappid
 	fi
 
-	if [ $userspace_pm -eq 1 ]; then
-		kill_wait $evts_ns1_pid
-		kill_wait $evts_ns2_pid
-		rm -rf $evts_ns1 $evts_ns2
-	fi
-
 	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
 		nstat | grep Tcp > /tmp/${listener_ns}.out
 	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
@@ -2961,22 +2965,24 @@ userspace_tests()
 	fi
 
 	# userspace pm add & remove address
-	if reset "userspace pm add & remove address"; then
+	if reset_with_events "userspace pm add & remove address"; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
+		kill_events_pids
 	fi
 
 	# userspace pm create destroy subflow
-	if reset "userspace pm create destroy subflow"; then
+	if reset_with_events "userspace pm create destroy subflow"; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
 		run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow
 		chk_join_nr 1 1 1
 		chk_rm_nr 0 1
+		kill_events_pids
 	fi
 }
 
-- 
cgit v1.2.3-58-ga151


From 178d023208ebbc082de91d43a3b7c4c32a3c953f Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 30 Nov 2022 15:06:33 +0100
Subject: selftests: mptcp: listener test for in-kernel PM

This patch adds test coverage for listening sockets created by the
in-kernel path manager in mptcp_join.sh.

It adds the listener event checking in the existing "remove single
address with port" test. The output looks like this:

 003 remove single address with port syn[ ok ] - synack[ ok ] - ack[ ok ]
                                     add[ ok ] - echo  [ ok ] - pt [ ok ]
                                     syn[ ok ] - synack[ ok ] - ack[ ok ]
                                     syn[ ok ] - ack   [ ok ]
                                     rm [ ok ] - rmsf  [ ok ]   invert
                                     CREATE_LISTENER 10.0.2.1:10100[ ok ]
                                     CLOSE_LISTENER 10.0.2.1:10100 [ ok ]

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 58 ++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 32a3694c57fb..d11d3d566608 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2513,6 +2513,57 @@ backup_tests()
 	fi
 }
 
+LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
+LISTENER_CLOSED=16  #MPTCP_EVENT_LISTENER_CLOSED
+
+AF_INET=2
+AF_INET6=10
+
+verify_listener_events()
+{
+	local evt=$1
+	local e_type=$2
+	local e_family=$3
+	local e_saddr=$4
+	local e_sport=$5
+	local type
+	local family
+	local saddr
+	local sport
+
+	if [ $e_type = $LISTENER_CREATED ]; then
+		stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\
+			$e_saddr $e_sport
+	elif [ $e_type = $LISTENER_CLOSED ]; then
+		stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\
+			$e_saddr $e_sport
+	fi
+
+	type=$(grep "type:$e_type," $evt |
+	       sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
+	family=$(grep "type:$e_type," $evt |
+		 sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
+	sport=$(grep "type:$e_type," $evt |
+		sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	if [ $family ] && [ $family = $AF_INET6 ]; then
+		saddr=$(grep "type:$e_type," $evt |
+			sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+	else
+		saddr=$(grep "type:$e_type," $evt |
+			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+	fi
+
+	if [ $type ] && [ $type = $e_type ] &&
+	   [ $family ] && [ $family = $e_family ] &&
+	   [ $saddr ] && [ $saddr = $e_saddr ] &&
+	   [ $sport ] && [ $sport = $e_sport ]; then
+		stdbuf -o0 -e0 printf "[ ok ]\n"
+		return 0
+	fi
+	fail_test
+	stdbuf -o0 -e0 printf "[fail]\n"
+}
+
 add_addr_ports_tests()
 {
 	# signal address with port
@@ -2537,7 +2588,8 @@ add_addr_ports_tests()
 	fi
 
 	# single address with port, remove
-	if reset "remove single address with port"; then
+	# pm listener events
+	if reset_with_events "remove single address with port"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns2 1 1
@@ -2545,6 +2597,10 @@ add_addr_ports_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1 1
 		chk_rm_nr 1 1 invert
+
+		verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100
+		verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100
+		kill_events_pids
 	fi
 
 	# subflow and signal with port, remove
-- 
cgit v1.2.3-58-ga151


From 7d0455e97072b81c03b6062dbcc1403ebf9d9da5 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 29 Nov 2022 17:48:15 +0100
Subject: selftests: Add a basic HSR test.

This test adds a basic HSRv0 network with 3 nodes. In its current shape
it sends and forwards packets, announcements and so merges nodes based
on MAC A/B information.
It is able to detect duplicate packets and packetloss should any occur.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/Makefile            |   1 +
 tools/testing/selftests/net/hsr/Makefile    |   7 +
 tools/testing/selftests/net/hsr/config      |   4 +
 tools/testing/selftests/net/hsr/hsr_ping.sh | 256 ++++++++++++++++++++++++++++
 4 files changed, 268 insertions(+)
 create mode 100644 tools/testing/selftests/net/hsr/Makefile
 create mode 100644 tools/testing/selftests/net/hsr/config
 create mode 100755 tools/testing/selftests/net/hsr/hsr_ping.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index f07aef7c592c..b57b091d8026 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -48,6 +48,7 @@ TARGETS += nci
 TARGETS += net
 TARGETS += net/af_unix
 TARGETS += net/forwarding
+TARGETS += net/hsr
 TARGETS += net/mptcp
 TARGETS += net/openvswitch
 TARGETS += netfilter
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
new file mode 100644
index 000000000000..92c1d9d080cd
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+TEST_PROGS := hsr_ping.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
new file mode 100644
index 000000000000..22061204fb69
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/config
@@ -0,0 +1,4 @@
+CONFIG_IPV6=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_HSR=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
new file mode 100755
index 000000000000..df9143538708
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+ksft_skip=4
+ipv6=true
+
+optstring="h4"
+usage() {
+	echo "Usage: $0 [OPTION]"
+	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"4")
+		ipv6=false
+		;;
+	"?")
+		usage $0
+		exit 1
+		;;
+esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+
+cleanup()
+{
+	local netns
+	for netns in "$ns1" "$ns2" "$ns3" ;do
+		ip netns del $netns
+	done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+	ip netns add $i || exit $ksft_skip
+	ip -net $i link set lo up
+done
+
+echo "INFO: preparing interfaces."
+# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
+#
+#    ns1eth1 ----- ns2eth1
+#      hsr1         hsr2
+#    ns1eth2       ns2eth2
+#       |            |
+#    ns3eth1      ns3eth2
+#           \    /
+#            hsr3
+#
+# Interfaces
+ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
+ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
+
+# HSRv0.
+ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0
+ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0
+ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0
+
+# IP for HSR
+ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
+ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
+ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
+ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+
+# All Links up
+ip -net "$ns1" link set ns1eth1 up
+ip -net "$ns1" link set ns1eth2 up
+ip -net "$ns1" link set hsr1 up
+
+ip -net "$ns2" link set ns2eth1 up
+ip -net "$ns2" link set ns2eth2 up
+ip -net "$ns2" link set hsr2 up
+
+ip -net "$ns3" link set ns3eth1 up
+ip -net "$ns3" link set ns3eth2 up
+ip -net "$ns3" link set hsr3 up
+
+# $1: IP address
+is_v6()
+{
+	[ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+	local netns="$1"
+	local connect_addr="$2"
+	local ping_args="-q -c 2"
+
+	if is_v6 "${connect_addr}"; then
+		$ipv6 || return 0
+		ping_args="${ping_args} -6"
+	fi
+
+	ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+	if [ $? -ne 0 ] ; then
+		echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+do_ping_long()
+{
+	local netns="$1"
+	local connect_addr="$2"
+	local ping_args="-q -c 10"
+
+	if is_v6 "${connect_addr}"; then
+		$ipv6 || return 0
+		ping_args="${ping_args} -6"
+	fi
+
+	OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+	if [ $? -ne 0 ] ; then
+		echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+		ret=1
+		return 1
+	fi
+
+	VAL="$(echo $OUT | cut -d' ' -f1-8)"
+	if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
+	then
+		echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+		echo "Expect to send and receive 10 packets and no duplicates."
+		echo "Full message: ${OUT}."
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+stop_if_error()
+{
+	local msg="$1"
+
+	if [ ${ret} -ne 0 ]; then
+		echo "FAIL: ${msg}" 1>&2
+		exit ${ret}
+	fi
+}
+
+
+echo "INFO: Initial validation ping."
+# Each node has to be able each one.
+do_ping "$ns1" 100.64.0.2
+do_ping "$ns2" 100.64.0.1
+do_ping "$ns3" 100.64.0.1
+stop_if_error "Initial validation failed."
+
+do_ping "$ns1" 100.64.0.3
+do_ping "$ns2" 100.64.0.3
+do_ping "$ns3" 100.64.0.2
+
+do_ping "$ns1" dead:beef:1::2
+do_ping "$ns1" dead:beef:1::3
+do_ping "$ns2" dead:beef:1::1
+do_ping "$ns2" dead:beef:1::2
+do_ping "$ns3" dead:beef:1::1
+do_ping "$ns3" dead:beef:1::2
+
+stop_if_error "Initial validation failed."
+
+# Wait until supervisor all supervision frames have been processed and the node
+# entries have been merged. Otherwise duplicate frames will be observed which is
+# valid at this stage.
+WAIT=5
+while [ ${WAIT} -gt 0 ]
+do
+	grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table
+	if [ $? -ne 0 ]
+	then
+		break
+	fi
+	sleep 1
+	let WAIT = WAIT - 1
+done
+
+# Just a safety delay in case the above check didn't handle it.
+sleep 1
+
+echo "INFO: Longer ping test."
+do_ping_long "$ns1" 100.64.0.2
+do_ping_long "$ns1" dead:beef:1::2
+do_ping_long "$ns1" 100.64.0.3
+do_ping_long "$ns1" dead:beef:1::3
+
+stop_if_error "Longer ping test failed."
+
+do_ping_long "$ns2" 100.64.0.1
+do_ping_long "$ns2" dead:beef:1::1
+do_ping_long "$ns2" 100.64.0.3
+do_ping_long "$ns2" dead:beef:1::2
+stop_if_error "Longer ping test failed."
+
+do_ping_long "$ns3" 100.64.0.1
+do_ping_long "$ns3" dead:beef:1::1
+do_ping_long "$ns3" 100.64.0.2
+do_ping_long "$ns3" dead:beef:1::2
+stop_if_error "Longer ping test failed."
+
+echo "INFO: Cutting one link."
+do_ping_long "$ns1" 100.64.0.3 &
+
+sleep 3
+ip -net "$ns3" link set ns3eth1 down
+wait
+
+ip -net "$ns3" link set ns3eth1 up
+
+stop_if_error "Failed with one link down."
+
+echo "INFO: Delay the link and drop a few packages."
+tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
+tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
+
+do_ping_long "$ns1" 100.64.0.2
+do_ping_long "$ns1" 100.64.0.3
+
+stop_if_error "Failed with delay and packetloss."
+
+do_ping_long "$ns2" 100.64.0.1
+do_ping_long "$ns2" 100.64.0.3
+
+stop_if_error "Failed with delay and packetloss."
+
+do_ping_long "$ns3" 100.64.0.1
+do_ping_long "$ns3" 100.64.0.2
+stop_if_error "Failed with delay and packetloss."
+
+echo "INFO: All good."
+exit $ret
-- 
cgit v1.2.3-58-ga151


From f16a7aa5c2be3134d3b72078b94f36d639552888 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Thu, 1 Dec 2022 12:09:39 -0700
Subject: selftests/bpf: Add GCC compatible builtins to bpf_legacy.h

The bpf_legacy.h header uses llvm specific load functions, add
GCC compatible variants as well to fix tests using these functions
under GCC.

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221201190939.3230513-1-james.hilliard1@gmail.com
---
 tools/testing/selftests/bpf/bpf_legacy.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 845209581440..bc4555a003a7 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,15 +2,22 @@
 #ifndef __BPF_LEGACY__
 #define __BPF_LEGACY__
 
+#if __GNUC__ && !__clang__
+/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions.  We
+ * provide the "standard" names as synonyms of the corresponding GCC
+ * builtins. Note how the SKB argument is ignored.
+ */
+#define load_byte(skb, off) __builtin_bpf_load_byte(off)
+#define load_half(skb, off) __builtin_bpf_load_half(off)
+#define load_word(skb, off) __builtin_bpf_load_word(off)
+#else
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
  */
-unsigned long long load_byte(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
-			     unsigned long long off) asm("llvm.bpf.load.word");
+unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word");
+#endif
 
 #endif
 
-- 
cgit v1.2.3-58-ga151


From 706819495921ddad6b3780140b9d9e9293b6dedc Mon Sep 17 00:00:00 2001
From: Xin Liu <liuxin350@huawei.com>
Date: Fri, 2 Dec 2022 16:17:38 +0800
Subject: libbpf: Improve usability of libbpf Makefile

Current libbpf Makefile does not contain the help command, which
is inconvenient to use. Similar to the Makefile help command of the
perf, a help command is provided to list the commands supported by
libbpf make and the functions of the commands.

Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221202081738.128513-1-liuxin350@huawei.com
---
 tools/lib/bpf/Makefile | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4c904ef0b47e..477666f3d496 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -286,3 +286,20 @@ tags:
 
 # Delete partially updated (corrupted) files on error
 .DELETE_ON_ERROR:
+
+help:
+	@echo 'libbpf common targets:'
+	@echo '  HINT: use "V=1" to enable verbose build'
+	@echo '  all     - build libraries and pkgconfig'
+	@echo '  clean   - remove all generated files'
+	@echo '  check   - check abi and version info'
+	@echo ''
+	@echo 'libbpf install targets:'
+	@echo '  HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")'
+	@echo '        to adjust target desitantion, e.g. "make prefix=/usr/local install"'
+	@echo '  install          - build and install all headers, libraries and pkgconfig'
+	@echo '  install_headers  - install only headers to include/bpf'
+	@echo ''
+	@echo 'libbpf make targets:'
+	@echo '  tags    - use ctags to make tag information for source code browsing'
+	@echo '  cscope  - use cscope to make interactive source code browsing database'
-- 
cgit v1.2.3-58-ga151


From 8723ec22a31db3f400fce440c235ada0fff95657 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 3 Dec 2022 10:46:07 -0800
Subject: selftests/bpf: Fix rcu_read_lock test with new MEM_RCU semantics

Add MEM_RCU pointer null checking for related tests. Also
modified task_acquire test so it takes a rcu ptr 'ptr' where
'ptr = rcu_ptr->rcu_field'.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221203184607.478314-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/rcu_read_lock.c | 55 ++++++++++++++++++-----
 1 file changed, 45 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 94a970076b98..cf06a34fcb02 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -23,13 +23,14 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
 void bpf_key_put(struct bpf_key *key) __ksym;
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
-struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
 void bpf_task_release(struct task_struct *p) __ksym;
 
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
 int get_cgroup_id(void *ctx)
 {
 	struct task_struct *task;
+	struct css_set *cgroups;
 
 	task = bpf_get_current_task_btf();
 	if (task->pid != target_pid)
@@ -37,7 +38,11 @@ int get_cgroup_id(void *ctx)
 
 	/* simulate bpf_get_current_cgroup_id() helper */
 	bpf_rcu_read_lock();
-	cgroup_id = task->cgroups->dfl_cgrp->kn->id;
+	cgroups = task->cgroups;
+	if (!cgroups)
+		goto unlock;
+	cgroup_id = cgroups->dfl_cgrp->kn->id;
+unlock:
 	bpf_rcu_read_unlock();
 	return 0;
 }
@@ -56,6 +61,8 @@ int task_succ(void *ctx)
 	bpf_rcu_read_lock();
 	/* region including helper using rcu ptr real_parent */
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!ptr)
@@ -92,7 +99,10 @@ int two_regions(void *ctx)
 	bpf_rcu_read_unlock();
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
 	bpf_rcu_read_unlock();
 	return 0;
 }
@@ -105,7 +115,10 @@ int non_sleepable_1(void *ctx)
 	task = bpf_get_current_task_btf();
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
 	bpf_rcu_read_unlock();
 	return 0;
 }
@@ -121,7 +134,10 @@ int non_sleepable_2(void *ctx)
 
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
 	bpf_rcu_read_unlock();
 	return 0;
 }
@@ -129,16 +145,28 @@ int non_sleepable_2(void *ctx)
 SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
 int task_acquire(void *ctx)
 {
-	struct task_struct *task, *real_parent;
+	struct task_struct *task, *real_parent, *gparent;
 
 	task = bpf_get_current_task_btf();
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
+
+	/* rcu_ptr->rcu_field */
+	gparent = real_parent->real_parent;
+	if (!gparent)
+		goto out;
+
 	/* acquire a reference which can be used outside rcu read lock region */
-	real_parent = bpf_task_acquire(real_parent);
+	gparent = bpf_task_acquire_not_zero(gparent);
+	if (!gparent)
+		goto out;
+
+	(void)bpf_task_storage_get(&map_a, gparent, 0, 0);
+	bpf_task_release(gparent);
+out:
 	bpf_rcu_read_unlock();
-	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
-	bpf_task_release(real_parent);
 	return 0;
 }
 
@@ -181,9 +209,12 @@ int non_sleepable_rcu_mismatch(void *ctx)
 	/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
 	if (real_parent)
 		bpf_rcu_read_unlock();
+out:
 	return 0;
 }
 
@@ -199,16 +230,17 @@ int inproper_sleepable_helper(void *ctx)
 	/* sleepable helper in rcu read lock region */
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
-	if (!regs) {
-		bpf_rcu_read_unlock();
-		return 0;
-	}
+	if (!regs)
+		goto out;
 
 	ptr = (void *)PT_REGS_IP(regs);
 	(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
 	user_data = value;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
 	bpf_rcu_read_unlock();
 	return 0;
 }
@@ -239,7 +271,10 @@ int nested_rcu_region(void *ctx)
 	bpf_rcu_read_lock();
 	bpf_rcu_read_lock();
 	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
 	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
 	bpf_rcu_read_unlock();
 	bpf_rcu_read_unlock();
 	return 0;
-- 
cgit v1.2.3-58-ga151


From c0c852dd1876dc1db4600ce951a92aadd3073b1c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 3 Dec 2022 12:49:54 -0800
Subject: bpf: Do not mark certain LSM hook arguments as trusted

Martin mentioned that the verifier cannot assume arguments from
LSM hook sk_alloc_security being trusted since after the hook
is called, the sk ref_count is set to 1. This will overwrite
the ref_count changed by the bpf program and may cause ref_count
underflow later on.

I then further checked some other hooks. For example,
for bpf_lsm_file_alloc() hook in fs/file_table.c,

        f->f_cred = get_cred(cred);
        error = security_file_alloc(f);
        if (unlikely(error)) {
                file_free_rcu(&f->f_rcuhead);
                return ERR_PTR(error);
        }

        atomic_long_set(&f->f_count, 1);

The input parameter 'f' to security_file_alloc() cannot be trusted
as well.

Specifically, I investiaged bpf_map/bpf_prog/file/sk/task alloc/free
lsm hooks. Except bpf_map_alloc and task_alloc, arguments for all other
hooks should not be considered as trusted. This may not be a complete
list, but it covers common usage for sk and task.

Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs")
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221203204954.2043348-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_lsm.h                                |  6 ++++++
 kernel/bpf/bpf_lsm.c                                   | 16 ++++++++++++++++
 kernel/bpf/btf.c                                       |  2 ++
 tools/testing/selftests/bpf/prog_tests/task_kfunc.c    |  1 +
 tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 11 +++++++++++
 5 files changed, 36 insertions(+)

(limited to 'tools')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 4bcf76a9bb06..1de7ece5d36d 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog);
 
 bool bpf_lsm_is_sleepable_hook(u32 btf_id);
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog);
 
 static inline struct bpf_storage_blob *bpf_inode(
 	const struct inode *inode)
@@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
 	return false;
 }
 
+static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+	return false;
+}
+
 static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 				      const struct bpf_prog *prog)
 {
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index ae0267f150b5..9ea42a45da47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -345,11 +345,27 @@ BTF_ID(func, bpf_lsm_task_to_inode)
 BTF_ID(func, bpf_lsm_userns_create)
 BTF_SET_END(sleepable_lsm_hooks)
 
+BTF_SET_START(untrusted_lsm_hooks)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_prog_free_security)
+BTF_ID(func, bpf_lsm_file_alloc_security)
+BTF_ID(func, bpf_lsm_file_free_security)
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_ID(func, bpf_lsm_task_free)
+BTF_SET_END(untrusted_lsm_hooks)
+
 bool bpf_lsm_is_sleepable_hook(u32 btf_id)
 {
 	return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
 }
 
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+	return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id);
+}
+
 const struct bpf_prog_ops lsm_prog_ops = {
 };
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d11cbf8cece7..c80bd8709e69 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -19,6 +19,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
+#include <linux/bpf_lsm.h>
 #include <linux/skmsg.h>
 #include <linux/perf_event.h>
 #include <linux/bsearch.h>
@@ -5829,6 +5830,7 @@ static bool prog_args_trusted(const struct bpf_prog *prog)
 	case BPF_PROG_TYPE_TRACING:
 		return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
 	case BPF_PROG_TYPE_LSM:
+		return bpf_lsm_is_trusted(prog);
 	case BPF_PROG_TYPE_STRUCT_OPS:
 		return true;
 	default:
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index ffd8ef4303c8..18848c31e36f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -103,6 +103,7 @@ static struct {
 	{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
 	{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
 	{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
+	{"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"},
 };
 
 static void verify_fail(const char *prog_name, const char *expected_err_msg)
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index e310473190d5..87fa1db9d9b5 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -271,3 +271,14 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl
 
 	return 0;
 }
+
+SEC("lsm/task_free")
+int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
+{
+	struct task_struct *acquired;
+
+	/* the argument of lsm task_free hook is untrusted. */
+	acquired = bpf_task_acquire(task);
+	bpf_task_release(acquired);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From 41d76c721c5c743470078d30e9bb8df08c489b1c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 30 Nov 2022 21:04:49 -0800
Subject: bpf: Add sleepable prog tests for cgrp local storage

Add three tests for cgrp local storage support for sleepable progs.
Two tests can load and run properly, one for cgroup_iter, another
for passing current->cgroups->dfl_cgrp to bpf_cgrp_storage_get()
helper. One test has bpf_rcu_read_lock() and failed to load.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20221201050449.2785613-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/cgrp_local_storage.c  | 94 ++++++++++++++++++++++
 .../selftests/bpf/progs/cgrp_ls_sleepable.c        | 80 ++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 1c30412ba132..33a2776737e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -10,7 +10,9 @@
 #include "cgrp_ls_recursion.skel.h"
 #include "cgrp_ls_attach_cgroup.skel.h"
 #include "cgrp_ls_negative.skel.h"
+#include "cgrp_ls_sleepable.skel.h"
 #include "network_helpers.h"
+#include "cgroup_helpers.h"
 
 struct socket_cookie {
 	__u64 cookie_key;
@@ -150,14 +152,100 @@ static void test_negative(void)
 	}
 }
 
+static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct cgrp_ls_sleepable *skel;
+	struct bpf_link *link;
+	int err, iter_fd;
+	char buf[16];
+
+	skel = cgrp_ls_sleepable__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.cgroup_iter, true);
+	err = cgrp_ls_sleepable__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = cgroup_fd;
+	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+		goto out;
+
+	/* trigger the program run */
+	(void)read(iter_fd, buf, sizeof(buf));
+
+	ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+
+	close(iter_fd);
+out:
+	cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_no_rcu_lock(__u64 cgroup_id)
+{
+	struct cgrp_ls_sleepable *skel;
+	int err;
+
+	skel = cgrp_ls_sleepable__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+	err = cgrp_ls_sleepable__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	err = cgrp_ls_sleepable__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+	cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_rcu_lock(void)
+{
+	struct cgrp_ls_sleepable *skel;
+	int err;
+
+	skel = cgrp_ls_sleepable__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+	err = cgrp_ls_sleepable__load(skel);
+	ASSERT_ERR(err, "skel_load");
+
+	cgrp_ls_sleepable__destroy(skel);
+}
+
 void test_cgrp_local_storage(void)
 {
+	__u64 cgroup_id;
 	int cgroup_fd;
 
 	cgroup_fd = test__join_cgroup("/cgrp_local_storage");
 	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
 		return;
 
+	cgroup_id = get_cgroup_id("/cgrp_local_storage");
 	if (test__start_subtest("tp_btf"))
 		test_tp_btf(cgroup_fd);
 	if (test__start_subtest("attach_cgroup"))
@@ -166,6 +254,12 @@ void test_cgrp_local_storage(void)
 		test_recursion(cgroup_fd);
 	if (test__start_subtest("negative"))
 		test_negative();
+	if (test__start_subtest("cgroup_iter_sleepable"))
+		test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+	if (test__start_subtest("no_rcu_lock"))
+		test_no_rcu_lock(cgroup_id);
+	if (test__start_subtest("rcu_lock"))
+		test_rcu_lock();
 
 	close(cgroup_fd);
 }
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
new file mode 100644
index 000000000000..2d11ed528b6f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+__u32 target_pid;
+__u64 cgroup_id;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?iter.s/cgroup")
+int cgroup_iter(struct bpf_iter__cgroup *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct cgroup *cgrp = ctx->cgroup;
+	long *ptr;
+
+	if (cgrp == NULL)
+		return 0;
+
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		cgroup_id = cgrp->kn->id;
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+	struct task_struct *task;
+	struct cgroup *cgrp;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	/* ptr_to_btf_id semantics. should work. */
+	cgrp = task->cgroups->dfl_cgrp;
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		cgroup_id = cgrp->kn->id;
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int yes_rcu_lock(void *ctx)
+{
+	struct task_struct *task;
+	struct cgroup *cgrp;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	bpf_rcu_read_lock();
+	cgrp = task->cgroups->dfl_cgrp;
+	/* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		cgroup_id = cgrp->kn->id;
+	bpf_rcu_read_unlock();
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From ab0350c743d5c93fd88742f02b3dff12168ab435 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Fri, 2 Dec 2022 18:08:44 -0700
Subject: selftests/bpf: Fix conflicts with built-in functions in bpf_iter_ksym

Both tolower and toupper are built in c functions, we should not
redefine them as this can result in a build error.

Fixes the following errors:
progs/bpf_iter_ksym.c:10:20: error: conflicting types for built-in function 'tolower'; expected 'int(int)' [-Werror=builtin-declaration-mismatch]
   10 | static inline char tolower(char c)
      |                    ^~~~~~~
progs/bpf_iter_ksym.c:5:1: note: 'tolower' is declared in header '<ctype.h>'
    4 | #include <bpf/bpf_helpers.h>
  +++ |+#include <ctype.h>
    5 |
progs/bpf_iter_ksym.c:17:20: error: conflicting types for built-in function 'toupper'; expected 'int(int)' [-Werror=builtin-declaration-mismatch]
   17 | static inline char toupper(char c)
      |                    ^~~~~~~
progs/bpf_iter_ksym.c:17:20: note: 'toupper' is declared in header '<ctype.h>'

See background on this sort of issue:
https://stackoverflow.com/a/20582607
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=12213

(C99, 7.1.3p1) "All identifiers with external linkage in any of the
following subclauses (including the future library directions) are
always reserved for use as identifiers with external linkage."

This is documented behavior in GCC:
https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#index-std-2

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221203010847.2191265-1-james.hilliard1@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/bpf_iter_ksym.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
index 285c008cbf9c..9ba14c37bbcc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -7,14 +7,14 @@ char _license[] SEC("license") = "GPL";
 
 unsigned long last_sym_value = 0;
 
-static inline char tolower(char c)
+static inline char to_lower(char c)
 {
 	if (c >= 'A' && c <= 'Z')
 		c += ('a' - 'A');
 	return c;
 }
 
-static inline char toupper(char c)
+static inline char to_upper(char c)
 {
 	if (c >= 'a' && c <= 'z')
 		c -= ('a' - 'A');
@@ -54,7 +54,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx)
 	type = iter->type;
 
 	if (iter->module_name[0]) {
-		type = iter->exported ? toupper(type) : tolower(type);
+		type = iter->exported ? to_upper(type) : to_lower(type);
 		BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
 			       value, type, iter->name, iter->module_name);
 	} else {
-- 
cgit v1.2.3-58-ga151


From 4f4ac4d9106efeec9c84469725c04c4237c7fb6c Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sat, 3 Dec 2022 10:46:58 +0200
Subject: tools: add IFLA_XFRM_COLLECT_METADATA to uapi/linux/if_link.h

Needed for XFRM metadata tests.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/20221203084659.1837829-4-eyal.birger@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 0242f31e339c..901d98b865a1 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -673,6 +673,7 @@ enum {
 	IFLA_XFRM_UNSPEC,
 	IFLA_XFRM_LINK,
 	IFLA_XFRM_IF_ID,
+	IFLA_XFRM_COLLECT_METADATA,
 	__IFLA_XFRM_MAX
 };
 
-- 
cgit v1.2.3-58-ga151


From 90a3a05eb33ff7a263f1e93e444d40e427cddf1a Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sat, 3 Dec 2022 10:46:59 +0200
Subject: selftests/bpf: add xfrm_info tests

Test the xfrm_info kfunc helpers.

The test setup creates three name spaces - NS0, NS1, NS2.

XFRM tunnels are setup between NS0 and the two other NSs.

The kfunc helpers are used to steer traffic from NS0 to the other
NSs based on a userspace populated bpf global variable and validate
that the return traffic had arrived from the desired NS.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/20221203084659.1837829-5-eyal.birger@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 tools/testing/selftests/bpf/config                 |   2 +
 tools/testing/selftests/bpf/prog_tests/xfrm_info.c | 362 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  |   3 +
 tools/testing/selftests/bpf/progs/xfrm_info.c      |  35 ++
 5 files changed, 403 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xfrm_info.c
 create mode 100644 tools/testing/selftests/bpf/progs/xfrm_info.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3481f3a5ea6f..585fcf73c731 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -85,3 +85,4 @@ xdp_bonding                              # failed to auto-attach program 'trace_
 xdp_bpf2bpf                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
 xdp_do_redirect                          # prog_run_max_size unexpected error: -22 (errno 22)
 xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
+xfrm_info                                # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f9034ea00bc9..3543c76cef56 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -23,6 +23,7 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_IMA=y
 CONFIG_IMA_READ_POLICY=y
 CONFIG_IMA_WRITE_POLICY=y
+CONFIG_INET_ESP=y
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_RAW=y
 CONFIG_IP_NF_TARGET_SYNPROXY=y
@@ -74,3 +75,4 @@ CONFIG_TEST_BPF=y
 CONFIG_USERFAULTFD=y
 CONFIG_VXLAN=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XFRM_INTERFACE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
new file mode 100644
index 000000000000..8b03c9bb4862
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ *   NS0 namespace         |   NS1 namespace        | NS2 namespace
+ *                         |                        |
+ *   +---------------+     |   +---------------+    |
+ *   |    ipsec0     |---------|    ipsec0     |    |
+ *   | 192.168.1.100 |     |   | 192.168.1.200 |    |
+ *   | if_id: bpf    |     |   +---------------+    |
+ *   +---------------+     |                        |
+ *           |             |                        |   +---------------+
+ *           |             |                        |   |    ipsec0     |
+ *           \------------------------------------------| 192.168.1.200 |
+ *                         |                        |   +---------------+
+ *                         |                        |
+ *                         |                        | (overlay network)
+ *      ------------------------------------------------------
+ *                         |                        | (underlay network)
+ *   +--------------+      |   +--------------+     |
+ *   |    veth01    |----------|    veth10    |     |
+ *   | 172.16.1.100 |      |   | 172.16.1.200 |     |
+ *   ---------------+      |   +--------------+     |
+ *                         |                        |
+ *   +--------------+      |                        |   +--------------+
+ *   |    veth02    |-----------------------------------|    veth20    |
+ *   | 172.16.2.100 |      |                        |   | 172.16.2.200 |
+ *   +--------------+      |                        |   +--------------+
+ *
+ *
+ * Test Packet flow
+ * -----------
+ *  The tests perform 'ping 192.168.1.200' from the NS0 namespace:
+ *  1) request is routed to NS0 ipsec0
+ *  2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based
+ *     on the requested value. This makes the ipsec0 device in external mode
+ *     select the destination tunnel
+ *  3) ping reaches the other namespace (NS1 or NS2 based on which if_id was
+ *     used) and response is sent
+ *  4) response is received on NS0 ipsec0, tc ingress program is triggered and
+ *     records the response if_id
+ *  5) requested if_id is compared with received if_id
+ */
+
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "xfrm_info.skel.h"
+
+#define NS0 "xfrm_test_ns0"
+#define NS1 "xfrm_test_ns1"
+#define NS2 "xfrm_test_ns2"
+
+#define IF_ID_0_TO_1 1
+#define IF_ID_0_TO_2 2
+#define IF_ID_1 3
+#define IF_ID_2 4
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define ESP_DUMMY_PARAMS \
+    "proto esp aead 'rfc4106(gcm(aes))' " \
+    "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+#define SYS_NOFAIL(fmt, ...)					\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		system(cmd);					\
+	})
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+	LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
+		    .prog_fd = igr_fd);
+	LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1,
+		    .prog_fd = egr_fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	if (igr_fd >= 0) {
+		hook->attach_point = BPF_TC_INGRESS;
+		ret = bpf_tc_attach(hook, &opts1);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	if (egr_fd >= 0) {
+		hook->attach_point = BPF_TC_EGRESS;
+		ret = bpf_tc_attach(hook, &opts2);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void cleanup(void)
+{
+	SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+	SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+	SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2);
+}
+
+static int config_underlay(void)
+{
+	SYS("ip netns add " NS0);
+	SYS("ip netns add " NS1);
+	SYS("ip netns add " NS2);
+
+	/* NS0 <-> NS1 [veth01 <-> veth10] */
+	SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+	SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+	SYS("ip -net " NS0 " link set dev veth01 up");
+	SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+	SYS("ip -net " NS1 " link set dev veth10 up");
+
+	/* NS0 <-> NS2 [veth02 <-> veth20] */
+	SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+	SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+	SYS("ip -net " NS0 " link set dev veth02 up");
+	SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+	SYS("ip -net " NS2 " link set dev veth20 up");
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
+				const char *ipv4_remote, int if_id)
+{
+	/* State: local -> remote */
+	SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+	    ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
+
+	/* State: local <- remote */
+	SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+	    ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
+
+	/* Policy: local -> remote */
+	SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+	    "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+	    if_id, ipv4_local, ipv4_remote, if_id);
+
+	/* Policy: local <- remote */
+	SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+	    "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+	    if_id, ipv4_remote, ipv4_local, if_id);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b,
+			     const char *ipv4_a, const char *ipv4_b,
+			     int if_id_a, int if_id_b)
+{
+	return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) ||
+		setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b);
+}
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+				 unsigned short len)
+{
+	struct rtattr *rta =
+		(struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+	rta->rta_type = type;
+	rta->rta_len = RTA_LENGTH(len);
+	nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+	return rta;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+				     const char *s)
+{
+	struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+	memcpy(RTA_DATA(rta), s, strlen(s));
+	return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+	return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+	uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+	attr->rta_len = end - (uint8_t *)attr;
+}
+
+static int setup_xfrmi_external_dev(const char *ns)
+{
+	struct {
+		struct nlmsghdr nh;
+		struct ifinfomsg info;
+		unsigned char data[128];
+	} req;
+	struct rtattr *link_info, *info_data;
+	struct nstoken *nstoken;
+	int ret = -1, sock = -1;
+	struct nlmsghdr *nh;
+
+	memset(&req, 0, sizeof(req));
+	nh = &req.nh;
+	nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+	nh->nlmsg_type = RTM_NEWLINK;
+	nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
+
+	rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
+	link_info = rtattr_begin(nh, IFLA_LINKINFO);
+	rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
+	info_data = rtattr_begin(nh, IFLA_INFO_DATA);
+	rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
+	rtattr_end(nh, info_data);
+	rtattr_end(nh, link_info);
+
+	nstoken = open_netns(ns);
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		goto done;
+
+	sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	if (!ASSERT_GE(sock, 0, "netlink socket"))
+		goto done;
+	ret = send(sock, nh, nh->nlmsg_len, 0);
+	if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
+		goto done;
+
+	ret = 0;
+done:
+	if (sock != -1)
+		close(sock);
+	if (nstoken)
+		close_netns(nstoken);
+	return ret;
+}
+
+static int config_overlay(void)
+{
+	if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
+			      IF_ID_0_TO_1, IF_ID_1))
+		goto fail;
+	if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
+			      IF_ID_0_TO_2, IF_ID_2))
+		goto fail;
+
+	/* Older iproute2 doesn't support this option */
+	if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
+		goto fail;
+
+	SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+	SYS("ip -net " NS0 " link set dev ipsec0 up");
+
+	SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+	SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+	SYS("ip -net " NS1 " link set dev ipsec0 up");
+
+	SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+	SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+	SYS("ip -net " NS2 " link set dev ipsec0 up");
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
+{
+	skel->bss->req_if_id = if_id;
+
+	SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+
+	if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
+		goto fail;
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void _test_xfrm_info(void)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
+	struct nstoken *nstoken = NULL;
+	struct xfrm_info *skel;
+	int ifindex;
+
+	/* load and attach bpf progs to ipsec dev tc hook point */
+	skel = xfrm_info__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
+		goto done;
+	nstoken = open_netns(NS0);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+		goto done;
+	ifindex = if_nametoindex("ipsec0");
+	if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
+	get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
+	if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
+			   set_xfrm_info_prog_fd))
+		goto done;
+
+	/* perform test */
+	if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
+		goto done;
+	if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
+		goto done;
+
+done:
+	if (nstoken)
+		close_netns(nstoken);
+	xfrm_info__destroy(skel);
+}
+
+void test_xfrm_info(void)
+{
+	cleanup();
+
+	if (!ASSERT_OK(config_underlay(), "config_underlay"))
+		goto done;
+	if (!ASSERT_OK(config_overlay(), "config_overlay"))
+		goto done;
+
+	if (test__start_subtest("xfrm_info"))
+		_test_xfrm_info();
+
+done:
+	cleanup();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index adb087aecc9e..b394817126cf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -25,6 +25,9 @@
 #define IPV6_TCLASS		67
 #define IPV6_AUTOFLOWLABEL	70
 
+#define TC_ACT_UNSPEC		(-1)
+#define TC_ACT_SHOT		2
+
 #define SOL_TCP			6
 #define TCP_NODELAY		1
 #define TCP_MAXSEG		2
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
new file mode 100644
index 000000000000..3acedcdd962d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 req_if_id;
+__u32 resp_if_id;
+
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+			  const struct bpf_xfrm_info *from) __ksym;
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
+			  struct bpf_xfrm_info *to) __ksym;
+
+SEC("tc")
+int set_xfrm_info(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_info info = { .if_id = req_if_id };
+
+	return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
+}
+
+SEC("tc")
+int get_xfrm_info(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_info info = {};
+
+	if (bpf_skb_get_xfrm_info(skb, &info) < 0)
+		return TC_ACT_SHOT;
+
+	resp_if_id = info.if_id;
+
+	return TC_ACT_UNSPEC;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-58-ga151


From fa55ef14ef4fe06198c0ce811b603aec24134bc2 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Tue, 6 Dec 2022 11:19:06 +0400
Subject: bpftool: Fix memory leak in do_build_table_cb

strdup() allocates memory for path. We need to release the memory in the
following error path. Add free() to avoid memory leak.

Fixes: 8f184732b60b ("bpftool: Switch to libbpf's hashmap for pinned paths of BPF objects")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221206071906.806384-1-linmq006@gmail.com
---
 tools/bpf/bpftool/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c90b756945e3..620032042576 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -501,6 +501,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
 	if (err) {
 		p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
 		      pinned_info.id, path, strerror(errno));
+		free(path);
 		goto out_close;
 	}
 
-- 
cgit v1.2.3-58-ga151


From aa67961f3243dfff26c47769f87b4d94b07ec71f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Tue, 6 Dec 2022 11:35:54 -0800
Subject: selftests/bpf: Allow building bpf tests with
 CONFIG_XFRM_INTERFACE=[m|n]

It is useful to use vmlinux.h in the xfrm_info test like other kfunc
tests do.  In particular, it is common for kfunc bpf prog that requires
to use other core kernel structures in vmlinux.h

Although vmlinux.h is preferred, it needs a ___local flavor of
struct bpf_xfrm_info in order to build the bpf selftests
when CONFIG_XFRM_INTERFACE=[m|n].

Cc: Eyal Birger <eyal.birger@gmail.com>
Fixes: 90a3a05eb33f ("selftests/bpf: add xfrm_info tests")
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20221206193554.1059757-1-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/xfrm_info.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
index 3acedcdd962d..f6a501fbba2b 100644
--- a/tools/testing/selftests/bpf/progs/xfrm_info.c
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -3,18 +3,23 @@
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
 
+struct bpf_xfrm_info___local {
+	u32 if_id;
+	int link;
+} __attribute__((preserve_access_index));
+
 __u32 req_if_id;
 __u32 resp_if_id;
 
 int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
-			  const struct bpf_xfrm_info *from) __ksym;
+			  const struct bpf_xfrm_info___local *from) __ksym;
 int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
-			  struct bpf_xfrm_info *to) __ksym;
+			  struct bpf_xfrm_info___local *to) __ksym;
 
 SEC("tc")
 int set_xfrm_info(struct __sk_buff *skb)
 {
-	struct bpf_xfrm_info info = { .if_id = req_if_id };
+	struct bpf_xfrm_info___local info = { .if_id = req_if_id };
 
 	return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
 }
@@ -22,7 +27,7 @@ int set_xfrm_info(struct __sk_buff *skb)
 SEC("tc")
 int get_xfrm_info(struct __sk_buff *skb)
 {
-	struct bpf_xfrm_info info = {};
+	struct bpf_xfrm_info___local info = {};
 
 	if (bpf_skb_get_xfrm_info(skb, &info) < 0)
 		return TC_ACT_SHOT;
-- 
cgit v1.2.3-58-ga151


From c21dc529baba16d6698a396eb997fee318300ee1 Mon Sep 17 00:00:00 2001
From: Timo Hunziker <timo.hunziker@gmx.ch>
Date: Sat, 3 Dec 2022 12:37:46 +0000
Subject: libbpf: Parse usdt args without offset on x86 (e.g. 8@(%rsp))

Parse USDT arguments like "8@(%rsp)" on x86. These are emmited by
SystemTap. The argument syntax is similar to the existing "memory
dereference case" but the offset left out as it's zero (i.e. read
the value from the address in the register). We treat it the same
as the the "memory dereference case", but set the offset to 0.

I've tested that this fixes the "unrecognized arg #N spec: 8@(%rsp).."
error I've run into when attaching to a probe with such an argument.
Attaching and reading the correct argument values works.

Something similar might be needed for the other supported
architectures.

  [0] Closes: https://github.com/libbpf/libbpf/issues/559

Signed-off-by: Timo Hunziker <timo.hunziker@gmx.ch>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221203123746.2160-1-timo.hunziker@eclipso.ch
---
 tools/lib/bpf/usdt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index b8daae265f99..75b411fc2c77 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1233,6 +1233,14 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 		if (reg_off < 0)
 			return reg_off;
 		arg->reg_off = reg_off;
+	} else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) {
+		/* Memory dereference case without offset, e.g., 8@(%rsp) */
+		arg->arg_type = USDT_ARG_REG_DEREF;
+		arg->val_off = 0;
+		reg_off = calc_pt_regs_off(reg_name);
+		if (reg_off < 0)
+			return reg_off;
+		arg->reg_off = reg_off;
 	} else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
 		/* Register read case, e.g., -4@%eax */
 		arg->arg_type = USDT_ARG_REG;
-- 
cgit v1.2.3-58-ga151


From d68ae4982cb773f3d738b5dc25f77f5c7550548a Mon Sep 17 00:00:00 2001
From: Daan De Meyer <daan.j.demeyer@gmail.com>
Date: Mon, 5 Dec 2022 14:16:16 +0100
Subject: selftests/bpf: Install all required files to run selftests

When installing the selftests using
"make -C tools/testing/selftests install", we need to make sure
all the required files to run the selftests are installed. Let's
make sure this is the case.

Signed-off-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221205131618.1524337-2-daan.j.demeyer@gmail.com
---
 tools/testing/selftests/bpf/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a0f043dc410..997b3bd10cbf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -532,8 +532,10 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
 		       $(OUTPUT)/sign-file				\
-		       ima_setup.sh verify_sig_setup.sh			\
-		       $(wildcard progs/btf_dump_test_case_*.c)
+		       ima_setup.sh 					\
+		       verify_sig_setup.sh				\
+		       $(wildcard progs/btf_dump_test_case_*.c)		\
+		       $(wildcard progs/*.bpf.o)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
-- 
cgit v1.2.3-58-ga151


From efe7fadbd59ec4513272c722403226e47321311b Mon Sep 17 00:00:00 2001
From: Daan De Meyer <daan.j.demeyer@gmail.com>
Date: Mon, 5 Dec 2022 14:16:17 +0100
Subject: selftests/bpf: Use "is not set" instead of "=n"

"=n" is not valid kconfig syntax. Use "is not set" instead to indicate
the option should be disabled.

Signed-off-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221205131618.1524337-3-daan.j.demeyer@gmail.com
---
 tools/testing/selftests/bpf/config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3543c76cef56..be892ea76c32 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -8,7 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y
 CONFIG_BPF_LSM=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_BPF_SYSCALL=y
-CONFIG_BPF_UNPRIV_DEFAULT_OFF=n
+# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
 CONFIG_CGROUP_BPF=y
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_SHA256=y
-- 
cgit v1.2.3-58-ga151


From d0c0b48c87274b7735f8c930a7c0d783fb46cfe9 Mon Sep 17 00:00:00 2001
From: Daan De Meyer <daan.j.demeyer@gmail.com>
Date: Mon, 5 Dec 2022 14:16:18 +0100
Subject: selftests/bpf: Use CONFIG_TEST_BPF=m instead of CONFIG_TEST_BPF=y

CONFIG_TEST_BPF can only be a module, so let's indicate it as such in
the selftests config.

Signed-off-by: Daan De Meyer <daan.j.demeyer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221205131618.1524337-4-daan.j.demeyer@gmail.com
---
 tools/testing/selftests/bpf/config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index be892ea76c32..612f699dc4f7 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,7 +71,7 @@ CONFIG_NF_NAT=y
 CONFIG_RC_CORE=y
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
-CONFIG_TEST_BPF=y
+CONFIG_TEST_BPF=m
 CONFIG_USERFAULTFD=y
 CONFIG_VXLAN=y
 CONFIG_XDP_SOCKETS=y
-- 
cgit v1.2.3-58-ga151


From 156ed20d22ee68d470232d26ae6df2cefacac4a0 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Tue, 6 Dec 2022 15:05:38 -0600
Subject: bpf: Don't use rcu_users to refcount in task kfuncs

A series of prior patches added some kfuncs that allow struct
task_struct * objects to be used as kptrs. These kfuncs leveraged the
'refcount_t rcu_users' field of the task for performing refcounting.
This field was used instead of 'refcount_t usage', as we wanted to
leverage the safety provided by RCU for ensuring a task's lifetime.

A struct task_struct is refcounted by two different refcount_t fields:

1. p->usage:     The "true" refcount field which task lifetime. The
		 task is freed as soon as this refcount drops to 0.

2. p->rcu_users: An "RCU users" refcount field which is statically
		 initialized to 2, and is co-located in a union with
		 a struct rcu_head field (p->rcu). p->rcu_users
		 essentially encapsulates a single p->usage
		 refcount, and when p->rcu_users goes to 0, an RCU
		 callback is scheduled on the struct rcu_head which
		 decrements the p->usage refcount.

Our logic was that by using p->rcu_users, we would be able to use RCU to
safely issue refcount_inc_not_zero() a task's rcu_users field to
determine if a task could still be acquired, or was exiting.
Unfortunately, this does not work due to p->rcu_users and p->rcu sharing
a union. When p->rcu_users goes to 0, an RCU callback is scheduled to
drop a single p->usage refcount, and because the fields share a union,
the refcount immediately becomes nonzero again after the callback is
scheduled.

If we were to split the fields out of the union, this wouldn't be a
problem. Doing so should also be rather non-controversial, as there are
a number of places in struct task_struct that have padding which we
could use to avoid growing the structure by splitting up the fields.

For now, so as to fix the kfuncs to be correct, this patch instead
updates bpf_task_acquire() and bpf_task_release() to use the p->usage
field for refcounting via the get_task_struct() and put_task_struct()
functions. Because we can no longer rely on RCU, the change also guts
the bpf_task_acquire_not_zero() and bpf_task_kptr_get() functions
pending a resolution on the above problem.

In addition, the task fixes the kfunc and rcu_read_lock selftests to
expect this new behavior.

Fixes: 90660309b0c7 ("bpf: Add kfuncs for storing struct task_struct * as a kptr")
Fixes: fca1aa75518c ("bpf: Handle MEM_RCU type properly")
Reported-by: Matus Jokay <matus.jokay@stuba.sk>
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20221206210538.597606-1-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/helpers.c                               | 76 ++++++++++++++--------
 tools/testing/selftests/bpf/progs/rcu_read_lock.c  |  5 ++
 .../selftests/bpf/progs/task_kfunc_success.c       |  9 ++-
 3 files changed, 60 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cca642358e80..284b3ffdbe48 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1833,8 +1833,7 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
  */
 struct task_struct *bpf_task_acquire(struct task_struct *p)
 {
-	refcount_inc(&p->rcu_users);
-	return p;
+	return get_task_struct(p);
 }
 
 /**
@@ -1845,9 +1844,48 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
  */
 struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
 {
-	if (!refcount_inc_not_zero(&p->rcu_users))
-		return NULL;
-	return p;
+	/* For the time being this function returns NULL, as it's not currently
+	 * possible to safely acquire a reference to a task with RCU protection
+	 * using get_task_struct() and put_task_struct(). This is due to the
+	 * slightly odd mechanics of p->rcu_users, and how task RCU protection
+	 * works.
+	 *
+	 * A struct task_struct is refcounted by two different refcount_t
+	 * fields:
+	 *
+	 * 1. p->usage:     The "true" refcount field which tracks a task's
+	 *		    lifetime. The task is freed as soon as this
+	 *		    refcount drops to 0.
+	 *
+	 * 2. p->rcu_users: An "RCU users" refcount field which is statically
+	 *		    initialized to 2, and is co-located in a union with
+	 *		    a struct rcu_head field (p->rcu). p->rcu_users
+	 *		    essentially encapsulates a single p->usage
+	 *		    refcount, and when p->rcu_users goes to 0, an RCU
+	 *		    callback is scheduled on the struct rcu_head which
+	 *		    decrements the p->usage refcount.
+	 *
+	 * There are two important implications to this task refcounting logic
+	 * described above. The first is that
+	 * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
+	 * after the refcount goes to 0, the RCU callback being scheduled will
+	 * cause the memory backing the refcount to again be nonzero due to the
+	 * fields sharing a union. The other is that we can't rely on RCU to
+	 * guarantee that a task is valid in a BPF program. This is because a
+	 * task could have already transitioned to being in the TASK_DEAD
+	 * state, had its rcu_users refcount go to 0, and its rcu callback
+	 * invoked in which it drops its single p->usage reference. At this
+	 * point the task will be freed as soon as the last p->usage reference
+	 * goes to 0, without waiting for another RCU gp to elapse. The only
+	 * way that a BPF program can guarantee that a task is valid is in this
+	 * scenario is to hold a p->usage refcount itself.
+	 *
+	 * Until we're able to resolve this issue, either by pulling
+	 * p->rcu_users and p->rcu out of the union, or by getting rid of
+	 * p->usage and just using p->rcu_users for refcounting, we'll just
+	 * return NULL here.
+	 */
+	return NULL;
 }
 
 /**
@@ -1858,33 +1896,15 @@ struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
  */
 struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
 {
-	struct task_struct *p;
-
-	rcu_read_lock();
-	p = READ_ONCE(*pp);
-
-	/* Another context could remove the task from the map and release it at
-	 * any time, including after we've done the lookup above. This is safe
-	 * because we're in an RCU read region, so the task is guaranteed to
-	 * remain valid until at least the rcu_read_unlock() below.
+	/* We must return NULL here until we have clarity on how to properly
+	 * leverage RCU for ensuring a task's lifetime. See the comment above
+	 * in bpf_task_acquire_not_zero() for more details.
 	 */
-	if (p && !refcount_inc_not_zero(&p->rcu_users))
-		/* If the task had been removed from the map and freed as
-		 * described above, refcount_inc_not_zero() will return false.
-		 * The task will be freed at some point after the current RCU
-		 * gp has ended, so just return NULL to the user.
-		 */
-		p = NULL;
-	rcu_read_unlock();
-
-	return p;
+	return NULL;
 }
 
 /**
  * bpf_task_release - Release the reference acquired on a struct task_struct *.
- * If this kfunc is invoked in an RCU read region, the task_struct is
- * guaranteed to not be freed until the current grace period has ended, even if
- * its refcount drops to 0.
  * @p: The task on which a reference is being released.
  */
 void bpf_task_release(struct task_struct *p)
@@ -1892,7 +1912,7 @@ void bpf_task_release(struct task_struct *p)
 	if (!p)
 		return;
 
-	put_task_struct_rcu_user(p);
+	put_task_struct(p);
 }
 
 #ifdef CONFIG_CGROUPS
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index cf06a34fcb02..125f908024d3 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -161,6 +161,11 @@ int task_acquire(void *ctx)
 	/* acquire a reference which can be used outside rcu read lock region */
 	gparent = bpf_task_acquire_not_zero(gparent);
 	if (!gparent)
+		/* Until we resolve the issues with using task->rcu_users, we
+		 * expect bpf_task_acquire_not_zero() to return a NULL task.
+		 * See the comment at the definition of
+		 * bpf_task_acquire_not_zero() for more details.
+		 */
 		goto out;
 
 	(void)bpf_task_storage_get(&map_a, gparent, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
index 60c7ead41cfc..9f359cfd29e7 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -123,12 +123,17 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
 	}
 
 	kptr = bpf_task_kptr_get(&v->task);
-	if (!kptr) {
+	if (kptr) {
+		/* Until we resolve the issues with using task->rcu_users, we
+		 * expect bpf_task_kptr_get() to return a NULL task. See the
+		 * comment at the definition of bpf_task_acquire_not_zero() for
+		 * more details.
+		 */
+		bpf_task_release(kptr);
 		err = 3;
 		return 0;
 	}
 
-	bpf_task_release(kptr);
 
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 537c3f66eac137a02ec50a40219d2da6597e5dc9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 7 Dec 2022 12:16:47 -0800
Subject: selftests/bpf: add generic BPF program tester-loader

It's become a common pattern to have a collection of small BPF programs
in one BPF object file, each representing one test case. On user-space
side of such tests we maintain a table of program names and expected
failure or success, along with optional expected verifier log message.

This works, but each set of tests reimplement this mundane code over and
over again, which is a waste of time for anyone trying to add a new set
of tests. Furthermore, it's quite error prone as it's way too easy to miss
some entries in these manually maintained test tables (as evidences by
dynptr_fail tests, in which ringbuf_release_uninit_dynptr subtest was
accidentally missed; this is fixed in next patch).

So this patch implements generic test_loader, which accepts skeleton
name and handles the rest of details: opens and loads BPF object file,
making sure each program is tested in isolation. Optionally each test
case can specify expected BPF verifier log message. In case of failure,
tester makes sure to report verifier log, but it also reports verifier
log in verbose mode unconditionally.

Now, the interesting deviation from existing custom implementations is
the use of btf_decl_tag attribute to specify expected-to-fail vs
expected-to-succeed markers and, optionally, expected log message
directly next to BPF program source code, eliminating the need to
manually create and update table of tests.

We define few macros wrapping btf_decl_tag with a convention that all
values of btf_decl_tag start with "comment:" prefix, and then utilizing
a very simple "just_some_text_tag" or "some_key_name=<value>" pattern to
define things like expected success/failure, expected verifier message,
extra verifier log level (if necessary). This approach is demonstrated
by next patch in which two existing sets of failure tests are converted.

Tester supports both expected-to-fail and expected-to-succeed programs,
though this patch set didn't convert any existing expected-to-succeed
programs yet, as existing tests couple BPF program loading with their
further execution through attach or test_prog_run. One way to allow
testing scenarios like this would be ability to specify custom callback,
executed for each successfully loaded BPF program. This is left for
follow up patches, after some more analysis of existing test cases.

This test_loader is, hopefully, a start of a test_verifier-like runner,
but integrated into test_progs infrastructure. It will allow much better
"user experience" of defining low-level verification tests that can take
advantage of all the libbpf-provided nicety features on BPF side: global
variables, declarative maps, etc.  All while having a choice of defining
it in C or as BPF assembly (through __attribute__((naked)) functions and
using embedded asm), depending on what makes most sense in each
particular case. This will be explored in follow up patches as well.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20221207201648.2990661-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile         |   2 +-
 tools/testing/selftests/bpf/progs/bpf_misc.h |   5 +
 tools/testing/selftests/bpf/test_loader.c    | 233 +++++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h     |  33 ++++
 4 files changed, 272 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_loader.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 997b3bd10cbf..c22c43bbee19 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -527,7 +527,7 @@ TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c flow_dissector_load.h		\
-			 cap_helpers.c
+			 cap_helpers.c test_loader.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 5bb11fe595a4..4a01ea9113bf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -2,6 +2,11 @@
 #ifndef __BPF_MISC_H__
 #define __BPF_MISC_H__
 
+#define __msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
+#define __failure		__attribute__((btf_decl_tag("comment:test_expect_failure")))
+#define __success		__attribute__((btf_decl_tag("comment:test_expect_success")))
+#define __log_level(lvl)	__attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+
 #if defined(__TARGET_ARCH_x86)
 #define SYSCALL_WRAPPER 1
 #define SYS_PREFIX "__x64_"
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
new file mode 100644
index 000000000000..679efb3aa785
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <stdlib.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#define str_has_pfx(str, pfx) \
+	(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+
+#define TEST_LOADER_LOG_BUF_SZ 1048576
+
+#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
+#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
+#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+
+struct test_spec {
+	const char *name;
+	bool expect_failure;
+	const char *expect_msg;
+	int log_level;
+};
+
+static int tester_init(struct test_loader *tester)
+{
+	if (!tester->log_buf) {
+		tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
+		tester->log_buf = malloc(tester->log_buf_sz);
+		if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void test_loader_fini(struct test_loader *tester)
+{
+	if (!tester)
+		return;
+
+	free(tester->log_buf);
+}
+
+static int parse_test_spec(struct test_loader *tester,
+			   struct bpf_object *obj,
+			   struct bpf_program *prog,
+			   struct test_spec *spec)
+{
+	struct btf *btf;
+	int func_id, i;
+
+	memset(spec, 0, sizeof(*spec));
+
+	spec->name = bpf_program__name(prog);
+
+	btf = bpf_object__btf(obj);
+	if (!btf) {
+		ASSERT_FAIL("BPF object has no BTF");
+		return -EINVAL;
+	}
+
+	func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC);
+	if (func_id < 0) {
+		ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name);
+		return -EINVAL;
+	}
+
+	for (i = 1; i < btf__type_cnt(btf); i++) {
+		const struct btf_type *t;
+		const char *s;
+
+		t = btf__type_by_id(btf, i);
+		if (!btf_is_decl_tag(t))
+			continue;
+
+		if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
+			continue;
+
+		s = btf__str_by_offset(btf, t->name_off);
+		if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+			spec->expect_failure = true;
+		} else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
+			spec->expect_failure = false;
+		} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
+			spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+		} else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+			errno = 0;
+			spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0);
+			if (errno) {
+				ASSERT_FAIL("failed to parse test log level from '%s'", s);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void prepare_case(struct test_loader *tester,
+			 struct test_spec *spec,
+			 struct bpf_object *obj,
+			 struct bpf_program *prog)
+{
+	int min_log_level = 0;
+
+	if (env.verbosity > VERBOSE_NONE)
+		min_log_level = 1;
+	if (env.verbosity > VERBOSE_VERY)
+		min_log_level = 2;
+
+	bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
+
+	/* Make sure we set at least minimal log level, unless test requirest
+	 * even higher level already. Make sure to preserve independent log
+	 * level 4 (verifier stats), though.
+	 */
+	if ((spec->log_level & 3) < min_log_level)
+		bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level);
+	else
+		bpf_program__set_log_level(prog, spec->log_level);
+
+	tester->log_buf[0] = '\0';
+}
+
+static void emit_verifier_log(const char *log_buf, bool force)
+{
+	if (!force && env.verbosity == VERBOSE_NONE)
+		return;
+	fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf);
+}
+
+static void validate_case(struct test_loader *tester,
+			  struct test_spec *spec,
+			  struct bpf_object *obj,
+			  struct bpf_program *prog,
+			  int load_err)
+{
+	if (spec->expect_msg) {
+		char *match;
+
+		match = strstr(tester->log_buf, spec->expect_msg);
+		if (!ASSERT_OK_PTR(match, "expect_msg")) {
+			/* if we are in verbose mode, we've already emitted log */
+			if (env.verbosity == VERBOSE_NONE)
+				emit_verifier_log(tester->log_buf, true /*force*/);
+			fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg);
+			return;
+		}
+	}
+}
+
+/* this function is forced noinline and has short generic name to look better
+ * in test_progs output (in case of a failure)
+ */
+static noinline
+void run_subtest(struct test_loader *tester,
+		 const char *skel_name,
+		 skel_elf_bytes_fn elf_bytes_factory)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
+	struct bpf_object *obj = NULL, *tobj;
+	struct bpf_program *prog, *tprog;
+	const void *obj_bytes;
+	size_t obj_byte_cnt;
+	int err;
+
+	if (tester_init(tester) < 0)
+		return; /* failed to initialize tester */
+
+	obj_bytes = elf_bytes_factory(&obj_byte_cnt);
+	obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+	if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
+		return;
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *prog_name = bpf_program__name(prog);
+		struct test_spec spec;
+
+		if (!test__start_subtest(prog_name))
+			continue;
+
+		/* if we can't derive test specification, go to the next test */
+		err = parse_test_spec(tester, obj, prog, &spec);
+		if (!ASSERT_OK(err, "parse_test_spec"))
+			continue;
+
+		tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+		if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
+			continue;
+
+		bpf_object__for_each_program(tprog, tobj)
+			bpf_program__set_autoload(tprog, false);
+
+		bpf_object__for_each_program(tprog, tobj) {
+			/* only load specified program */
+			if (strcmp(bpf_program__name(tprog), prog_name) == 0) {
+				bpf_program__set_autoload(tprog, true);
+				break;
+			}
+		}
+
+		prepare_case(tester, &spec, tobj, tprog);
+
+		err = bpf_object__load(tobj);
+		if (spec.expect_failure) {
+			if (!ASSERT_ERR(err, "unexpected_load_success")) {
+				emit_verifier_log(tester->log_buf, false /*force*/);
+				goto tobj_cleanup;
+			}
+		} else {
+			if (!ASSERT_OK(err, "unexpected_load_failure")) {
+				emit_verifier_log(tester->log_buf, true /*force*/);
+				goto tobj_cleanup;
+			}
+		}
+
+		emit_verifier_log(tester->log_buf, false /*force*/);
+		validate_case(tester, &spec, tobj, tprog, err);
+
+tobj_cleanup:
+		bpf_object__close(tobj);
+	}
+
+	bpf_object__close(obj);
+}
+
+void test_loader__run_subtests(struct test_loader *tester,
+			       const char *skel_name,
+			       skel_elf_bytes_fn elf_bytes_factory)
+{
+	/* see comment in run_subtest() for why we do this function nesting */
+	run_subtest(tester, skel_name, elf_bytes_factory);
+}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index b090996daee5..3f058dfadbaf 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TEST_PROGS_H
+#define __TEST_PROGS_H
+
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
@@ -210,6 +213,12 @@ int test__join_cgroup(const char *path);
 #define CHECK_ATTR(condition, tag, format...) \
 	_CHECK(condition, tag, tattr.duration, format)
 
+#define ASSERT_FAIL(fmt, args...) ({					\
+	static int duration = 0;					\
+	CHECK(false, "", fmt"\n", ##args);				\
+	false;								\
+})
+
 #define ASSERT_TRUE(actual, name) ({					\
 	static int duration = 0;					\
 	bool ___ok = (actual);						\
@@ -397,3 +406,27 @@ int write_sysctl(const char *sysctl, const char *value);
 #endif
 
 #define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
+
+struct test_loader {
+	char *log_buf;
+	size_t log_buf_sz;
+
+	struct bpf_object *obj;
+};
+
+typedef const void *(*skel_elf_bytes_fn)(size_t *sz);
+
+extern void test_loader__run_subtests(struct test_loader *tester,
+				      const char *skel_name,
+				      skel_elf_bytes_fn elf_bytes_factory);
+
+extern void test_loader_fini(struct test_loader *tester);
+
+#define RUN_TESTS(skel) ({						       \
+	struct test_loader tester = {};					       \
+									       \
+	test_loader__run_subtests(&tester, #skel, skel##__elf_bytes);	       \
+	test_loader_fini(&tester);					       \
+})
+
+#endif /* __TEST_PROGS_H */
-- 
cgit v1.2.3-58-ga151


From 26c386ecf0212affb50f02dabcb0152995b99b07 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 7 Dec 2022 12:16:48 -0800
Subject: selftests/bpf: convert dynptr_fail and map_kptr_fail subtests to
 generic tester

Convert big chunks of dynptr and map_kptr subtests to use generic
verification_tester. They are switched from using manually maintained
tables of test cases, specifying program name and expected error
verifier message, to btf_decl_tag-based annotations directly on
corresponding BPF programs: __failure to specify that BPF program is
expected to fail verification, and __msg() to specify expected log
message.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207201648.2990661-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/dynptr.c    | 80 +---------------------
 tools/testing/selftests/bpf/prog_tests/map_kptr.c  | 80 +---------------------
 tools/testing/selftests/bpf/progs/dynptr_fail.c    | 31 +++++++++
 tools/testing/selftests/bpf/progs/dynptr_success.c |  1 +
 tools/testing/selftests/bpf/progs/map_kptr_fail.c  | 27 ++++++++
 5 files changed, 64 insertions(+), 155 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b0c06f821cb8..7faaf6d9e0d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -5,86 +5,16 @@
 #include "dynptr_fail.skel.h"
 #include "dynptr_success.skel.h"
 
-static size_t log_buf_sz = 1048576; /* 1 MB */
-static char obj_log_buf[1048576];
-
 static struct {
 	const char *prog_name;
 	const char *expected_err_msg;
 } dynptr_tests[] = {
-	/* failure cases */
-	{"ringbuf_missing_release1", "Unreleased reference id=1"},
-	{"ringbuf_missing_release2", "Unreleased reference id=2"},
-	{"ringbuf_missing_release_callback", "Unreleased reference id"},
-	{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
-	{"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"},
-	{"add_dynptr_to_map1", "invalid indirect read from stack"},
-	{"add_dynptr_to_map2", "invalid indirect read from stack"},
-	{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
-	{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
-	{"data_slice_use_after_release1", "invalid mem access 'scalar'"},
-	{"data_slice_use_after_release2", "invalid mem access 'scalar'"},
-	{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
-	{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
-	{"invalid_helper1", "invalid indirect read from stack"},
-	{"invalid_helper2", "Expected an initialized dynptr as arg #3"},
-	{"invalid_write1", "Expected an initialized dynptr as arg #1"},
-	{"invalid_write2", "Expected an initialized dynptr as arg #3"},
-	{"invalid_write3", "Expected an initialized dynptr as arg #1"},
-	{"invalid_write4", "arg 1 is an unacquired reference"},
-	{"invalid_read1", "invalid read from stack"},
-	{"invalid_read2", "cannot pass in dynptr at an offset"},
-	{"invalid_read3", "invalid read from stack"},
-	{"invalid_read4", "invalid read from stack"},
-	{"invalid_offset", "invalid write to stack"},
-	{"global", "type=map_value expected=fp"},
-	{"release_twice", "arg 1 is an unacquired reference"},
-	{"release_twice_callback", "arg 1 is an unacquired reference"},
-	{"dynptr_from_mem_invalid_api",
-		"Unsupported reg type fp for bpf_dynptr_from_mem data"},
-
 	/* success cases */
 	{"test_read_write", NULL},
 	{"test_data_slice", NULL},
 	{"test_ringbuf", NULL},
 };
 
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct bpf_program *prog;
-	struct dynptr_fail *skel;
-	int err;
-
-	opts.kernel_log_buf = obj_log_buf;
-	opts.kernel_log_size = log_buf_sz;
-	opts.kernel_log_level = 1;
-
-	skel = dynptr_fail__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
-		goto cleanup;
-
-	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
-	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-		goto cleanup;
-
-	bpf_program__set_autoload(prog, true);
-
-	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
-
-	err = dynptr_fail__load(skel);
-	if (!ASSERT_ERR(err, "unexpected load success"))
-		goto cleanup;
-
-	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
-		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
-		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
-	}
-
-cleanup:
-	dynptr_fail__destroy(skel);
-}
-
 static void verify_success(const char *prog_name)
 {
 	struct dynptr_success *skel;
@@ -97,8 +27,6 @@ static void verify_success(const char *prog_name)
 
 	skel->bss->pid = getpid();
 
-	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
-
 	dynptr_success__load(skel);
 	if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
 		goto cleanup;
@@ -129,10 +57,8 @@ void test_dynptr(void)
 		if (!test__start_subtest(dynptr_tests[i].prog_name))
 			continue;
 
-		if (dynptr_tests[i].expected_err_msg)
-			verify_fail(dynptr_tests[i].prog_name,
-				    dynptr_tests[i].expected_err_msg);
-		else
-			verify_success(dynptr_tests[i].prog_name);
+		verify_success(dynptr_tests[i].prog_name);
 	}
+
+	RUN_TESTS(dynptr_fail);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 0d66b1524208..3533a4ecad01 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -5,83 +5,6 @@
 #include "map_kptr.skel.h"
 #include "map_kptr_fail.skel.h"
 
-static char log_buf[1024 * 1024];
-
-struct {
-	const char *prog_name;
-	const char *err_msg;
-} map_kptr_fail_tests[] = {
-	{ "size_not_bpf_dw", "kptr access size must be BPF_DW" },
-	{ "non_const_var_off", "kptr access cannot have variable offset" },
-	{ "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" },
-	{ "misaligned_access_write", "kptr access misaligned expected=8 off=7" },
-	{ "misaligned_access_read", "kptr access misaligned expected=8 off=1" },
-	{ "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" },
-	{ "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" },
-	{ "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
-	{ "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" },
-	{ "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" },
-	{ "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" },
-	{ "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" },
-	{ "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" },
-	{ "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" },
-	{ "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" },
-	{ "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" },
-	{ "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
-	{ "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" },
-	{ "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" },
-	{ "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" },
-	{ "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" },
-	{ "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" },
-	{ "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" },
-	{ "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" },
-	{ "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" },
-};
-
-static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
-						.kernel_log_size = sizeof(log_buf),
-						.kernel_log_level = 1);
-	struct map_kptr_fail *skel;
-	struct bpf_program *prog;
-	int ret;
-
-	skel = map_kptr_fail__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts"))
-		return;
-
-	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
-	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-		goto end;
-
-	bpf_program__set_autoload(prog, true);
-
-	ret = map_kptr_fail__load(skel);
-	if (!ASSERT_ERR(ret, "map_kptr__load must fail"))
-		goto end;
-
-	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
-		fprintf(stderr, "Expected: %s\n", err_msg);
-		fprintf(stderr, "Verifier: %s\n", log_buf);
-	}
-
-end:
-	map_kptr_fail__destroy(skel);
-}
-
-static void test_map_kptr_fail(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) {
-		if (!test__start_subtest(map_kptr_fail_tests[i].prog_name))
-			continue;
-		test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name,
-					map_kptr_fail_tests[i].err_msg);
-	}
-}
-
 static void test_map_kptr_success(bool test_run)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -145,5 +68,6 @@ void test_map_kptr(void)
 		 */
 		test_map_kptr_success(true);
 	}
-	test_map_kptr_fail();
+
+	RUN_TESTS(map_kptr_fail);
 }
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index b0f08ff024fb..78debc1b3820 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -43,6 +43,7 @@ struct sample {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 4096);
 } ringbuf SEC(".maps");
 
 int err, val;
@@ -66,6 +67,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
  * bpf_ringbuf_submit/discard_dynptr call
  */
 SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
 int ringbuf_missing_release1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -78,6 +80,7 @@ int ringbuf_missing_release1(void *ctx)
 }
 
 SEC("?raw_tp")
+__failure __msg("Unreleased reference id=2")
 int ringbuf_missing_release2(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
@@ -113,6 +116,7 @@ static int missing_release_callback_fn(__u32 index, void *data)
 
 /* Any dynptr initialized within a callback must have bpf_dynptr_put called */
 SEC("?raw_tp")
+__failure __msg("Unreleased reference id")
 int ringbuf_missing_release_callback(void *ctx)
 {
 	bpf_loop(10, missing_release_callback_fn, NULL, 0);
@@ -121,6 +125,7 @@ int ringbuf_missing_release_callback(void *ctx)
 
 /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
 SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
 int ringbuf_release_uninit_dynptr(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -133,6 +138,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
 
 /* A dynptr can't be used after it has been invalidated */
 SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
 int use_after_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -152,6 +158,7 @@ int use_after_invalid(void *ctx)
 
 /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
 SEC("?raw_tp")
+__failure __msg("type=mem expected=ringbuf_mem")
 int ringbuf_invalid_api(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -174,6 +181,7 @@ done:
 
 /* Can't add a dynptr to a map */
 SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
 int add_dynptr_to_map1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -191,6 +199,7 @@ int add_dynptr_to_map1(void *ctx)
 
 /* Can't add a struct with an embedded dynptr to a map */
 SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
 int add_dynptr_to_map2(void *ctx)
 {
 	struct test_info x;
@@ -208,6 +217,7 @@ int add_dynptr_to_map2(void *ctx)
 
 /* A data slice can't be accessed out of bounds */
 SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
 int data_slice_out_of_bounds_ringbuf(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -228,6 +238,7 @@ done:
 }
 
 SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
 int data_slice_out_of_bounds_map_value(void *ctx)
 {
 	__u32 key = 0, map_val;
@@ -248,6 +259,7 @@ int data_slice_out_of_bounds_map_value(void *ctx)
 
 /* A data slice can't be used after it has been released */
 SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
 int data_slice_use_after_release1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -279,6 +291,7 @@ done:
  * ptr2 is at fp - 16).
  */
 SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
 int data_slice_use_after_release2(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
@@ -310,6 +323,7 @@ done:
 
 /* A data slice must be first checked for NULL */
 SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
 int data_slice_missing_null_check1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -330,6 +344,7 @@ int data_slice_missing_null_check1(void *ctx)
 
 /* A data slice can't be dereferenced if it wasn't checked for null */
 SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
 int data_slice_missing_null_check2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -352,6 +367,7 @@ done:
  * dynptr argument
  */
 SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
 int invalid_helper1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -366,6 +382,7 @@ int invalid_helper1(void *ctx)
 
 /* A dynptr can't be passed into a helper function at a non-zero offset */
 SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
 int invalid_helper2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -381,6 +398,7 @@ int invalid_helper2(void *ctx)
 
 /* A bpf_dynptr is invalidated if it's been written into */
 SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
 int invalid_write1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -402,6 +420,7 @@ int invalid_write1(void *ctx)
  * offset
  */
 SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
 int invalid_write2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -425,6 +444,7 @@ int invalid_write2(void *ctx)
  * non-const offset
  */
 SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
 int invalid_write3(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -456,6 +476,7 @@ static int invalid_write4_callback(__u32 index, void *data)
  * be invalidated as a dynptr
  */
 SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
 int invalid_write4(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -472,7 +493,9 @@ int invalid_write4(void *ctx)
 
 /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
 struct bpf_dynptr global_dynptr;
+
 SEC("?raw_tp")
+__failure __msg("type=map_value expected=fp")
 int global(void *ctx)
 {
 	/* this should fail */
@@ -485,6 +508,7 @@ int global(void *ctx)
 
 /* A direct read should fail */
 SEC("?raw_tp")
+__failure __msg("invalid read from stack")
 int invalid_read1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -501,6 +525,7 @@ int invalid_read1(void *ctx)
 
 /* A direct read at an offset should fail */
 SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset")
 int invalid_read2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -516,6 +541,7 @@ int invalid_read2(void *ctx)
 
 /* A direct read at an offset into the lower stack slot should fail */
 SEC("?raw_tp")
+__failure __msg("invalid read from stack")
 int invalid_read3(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
@@ -542,6 +568,7 @@ static int invalid_read4_callback(__u32 index, void *data)
 
 /* A direct read within a callback function should fail */
 SEC("?raw_tp")
+__failure __msg("invalid read from stack")
 int invalid_read4(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -557,6 +584,7 @@ int invalid_read4(void *ctx)
 
 /* Initializing a dynptr on an offset should fail */
 SEC("?raw_tp")
+__failure __msg("invalid write to stack")
 int invalid_offset(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -571,6 +599,7 @@ int invalid_offset(void *ctx)
 
 /* Can't release a dynptr twice */
 SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
 int release_twice(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -597,6 +626,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
  * within a calback function, fails
  */
 SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
 int release_twice_callback(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -612,6 +642,7 @@ int release_twice_callback(void *ctx)
 
 /* Reject unsupported local mem types for dynptr_from_mem API */
 SEC("?raw_tp")
+__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data")
 int dynptr_from_mem_invalid_api(void *ctx)
 {
 	struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index a3a6103c8569..35db7c6c1fc7 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -20,6 +20,7 @@ struct sample {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 4096);
 } ringbuf SEC(".maps");
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 05e209b1b12a..760e41e1a632 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -3,6 +3,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
 
 struct map_value {
 	char buf[8];
@@ -23,6 +24,7 @@ extern struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
 
 SEC("?tc")
+__failure __msg("kptr access size must be BPF_DW")
 int size_not_bpf_dw(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -37,6 +39,7 @@ int size_not_bpf_dw(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("kptr access cannot have variable offset")
 int non_const_var_off(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -55,6 +58,7 @@ int non_const_var_off(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. kptr has to be")
 int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -73,6 +77,7 @@ int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=7")
 int misaligned_access_write(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -88,6 +93,7 @@ int misaligned_access_write(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=1")
 int misaligned_access_read(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -101,6 +107,7 @@ int misaligned_access_read(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)")
 int reject_var_off_store(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *unref_ptr;
@@ -124,6 +131,7 @@ int reject_var_off_store(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc")
 int reject_bad_type_match(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *unref_ptr;
@@ -144,6 +152,7 @@ int reject_bad_type_match(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
 int marked_as_untrusted_or_null(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -158,6 +167,7 @@ int marked_as_untrusted_or_null(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4")
 int correct_btf_id_check_size(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
@@ -175,6 +185,7 @@ int correct_btf_id_check_size(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_")
 int inherit_untrusted_on_walk(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *unref_ptr;
@@ -194,6 +205,7 @@ int inherit_untrusted_on_walk(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("off=8 kptr isn't referenced kptr")
 int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -208,6 +220,7 @@ int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("arg#0 expected pointer to map value")
 int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
 {
 	bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0);
@@ -215,6 +228,7 @@ int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("arg#0 expected pointer to map value")
 int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
 {
 	bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0);
@@ -222,6 +236,7 @@ int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("arg#0 no referenced kptr at map value offset=0")
 int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -236,6 +251,7 @@ int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("arg#0 no referenced kptr at map value offset=8")
 int reject_kptr_get_on_unref(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -250,6 +266,7 @@ int reject_kptr_get_on_unref(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("kernel function bpf_kfunc_call_test_kptr_get args#0")
 int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -264,6 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
 int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -278,6 +296,7 @@ int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("store to referenced kptr disallowed")
 int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
@@ -297,6 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
 int reject_untrusted_xchg(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
@@ -315,6 +335,8 @@ int reject_untrusted_xchg(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure
+__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
 int reject_bad_type_xchg(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *ref_ptr;
@@ -333,6 +355,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
 int reject_member_of_ref_xchg(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *ref_ptr;
@@ -351,6 +374,7 @@ int reject_member_of_ref_xchg(struct __sk_buff *ctx)
 }
 
 SEC("?syscall")
+__failure __msg("kptr cannot be accessed indirectly by helper")
 int reject_indirect_helper_access(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -371,6 +395,7 @@ int write_func(int *p)
 }
 
 SEC("?tc")
+__failure __msg("kptr cannot be accessed indirectly by helper")
 int reject_indirect_global_func_access(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -384,6 +409,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("Unreleased reference id=5 alloc_insn=")
 int kptr_xchg_ref_state(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
@@ -402,6 +428,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=")
 int kptr_get_ref_state(struct __sk_buff *ctx)
 {
 	struct map_value *v;
-- 
cgit v1.2.3-58-ga151


From e60db051a4a70bff151eb59774c64af3a0266794 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 6 Dec 2022 15:27:39 -0800
Subject: selftests/bpf: Bring test_offload.py back to life

Bpftool has new extra libbpf_det_bind probing map we need to exclude.
Also skip trying to load netdevsim modules if it's already loaded (builtin).

v2:
- drop iproute2->bpftool changes (Toke)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221206232739.2504890-1-sdf@google.com
---
 tools/testing/selftests/bpf/test_offload.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 7fc15e0d24a9..7cb1bc05e5cf 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -769,12 +769,14 @@ skip(ret != 0, "bpftool not installed")
 base_progs = progs
 _, base_maps = bpftool("map")
 base_map_names = [
-    'pid_iter.rodata' # created on each bpftool invocation
+    'pid_iter.rodata', # created on each bpftool invocation
+    'libbpf_det_bind', # created on each bpftool invocation
 ]
 
 # Check netdevsim
-ret, out = cmd("modprobe netdevsim", fail=False)
-skip(ret != 0, "netdevsim module could not be loaded")
+if not os.path.isdir("/sys/bus/netdevsim/"):
+    ret, out = cmd("modprobe netdevsim", fail=False)
+    skip(ret != 0, "netdevsim module could not be loaded")
 
 # Check debugfs
 _, out = cmd("mount")
-- 
cgit v1.2.3-58-ga151


From 6b75bd3d036745b9be30917909f03602099adbdb Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:35 +0530
Subject: bpf: Refactor ARG_PTR_TO_DYNPTR checks into process_dynptr_func

ARG_PTR_TO_DYNPTR is akin to ARG_PTR_TO_TIMER, ARG_PTR_TO_KPTR, where
the underlying register type is subjected to more special checks to
determine the type of object represented by the pointer and its state
consistency.

Move dynptr checks to their own 'process_dynptr_func' function so that
is consistent and in-line with existing code. This also makes it easier
to reuse this code for kfunc handling.

Then, reuse this consolidated function in kfunc dynptr handling too.
Note that for kfuncs, the arg_type constraint of DYNPTR_TYPE_LOCAL has
been lifted.

Acked-by: David Vernet <void@manifault.com>
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |   8 +-
 kernel/bpf/verifier.c                              | 134 +++++++++++----------
 .../selftests/bpf/prog_tests/kfunc_dynptr_param.c  |   7 +-
 .../selftests/bpf/progs/test_kfunc_dynptr_param.c  |  12 --
 4 files changed, 75 insertions(+), 86 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 70d06a99f0b8..df0cb825e0e3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -615,11 +615,9 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   enum bpf_arg_type arg_type);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 		   u32 regno, u32 mem_size);
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
-			      struct bpf_reg_state *reg);
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
-			     struct bpf_reg_state *reg,
-			     enum bpf_arg_type arg_type);
+struct bpf_call_arg_meta;
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+			enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
 
 /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
 static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3194e9d9e4e4..fcd8a71035aa 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -810,8 +810,7 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 	return true;
 }
 
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
-			      struct bpf_reg_state *reg)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
 	int spi = get_spi(reg->off);
@@ -830,9 +829,8 @@ bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
 	return true;
 }
 
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
-			     struct bpf_reg_state *reg,
-			     enum bpf_arg_type arg_type)
+static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+				    enum bpf_arg_type arg_type)
 {
 	struct bpf_func_state *state = func(env, reg);
 	enum bpf_dynptr_type dynptr_type;
@@ -5859,6 +5857,65 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 	return 0;
 }
 
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+			enum bpf_arg_type arg_type,
+			struct bpf_call_arg_meta *meta)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+
+	/* We only need to check for initialized / uninitialized helper
+	 * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
+	 * assumption is that if it is, that a helper function
+	 * initialized the dynptr on behalf of the BPF program.
+	 */
+	if (base_type(reg->type) == PTR_TO_DYNPTR)
+		return 0;
+	if (arg_type & MEM_UNINIT) {
+		if (!is_dynptr_reg_valid_uninit(env, reg)) {
+			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
+			return -EINVAL;
+		}
+
+		/* We only support one dynptr being uninitialized at the moment,
+		 * which is sufficient for the helper functions we have right now.
+		 */
+		if (meta->uninit_dynptr_regno) {
+			verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
+			return -EFAULT;
+		}
+
+		meta->uninit_dynptr_regno = regno;
+	} else {
+		if (!is_dynptr_reg_valid_init(env, reg)) {
+			verbose(env,
+				"Expected an initialized dynptr as arg #%d\n",
+				regno);
+			return -EINVAL;
+		}
+
+		if (!is_dynptr_type_expected(env, reg, arg_type)) {
+			const char *err_extra = "";
+
+			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+			case DYNPTR_TYPE_LOCAL:
+				err_extra = "local";
+				break;
+			case DYNPTR_TYPE_RINGBUF:
+				err_extra = "ringbuf";
+				break;
+			default:
+				err_extra = "<unknown>";
+				break;
+			}
+			verbose(env,
+				"Expected a dynptr of type %s as arg #%d\n",
+				err_extra, regno);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 static bool arg_type_is_mem_size(enum bpf_arg_type type)
 {
 	return type == ARG_CONST_SIZE ||
@@ -6390,52 +6447,8 @@ skip_type_check:
 		err = check_mem_size_reg(env, reg, regno, true, meta);
 		break;
 	case ARG_PTR_TO_DYNPTR:
-		/* We only need to check for initialized / uninitialized helper
-		 * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
-		 * assumption is that if it is, that a helper function
-		 * initialized the dynptr on behalf of the BPF program.
-		 */
-		if (base_type(reg->type) == PTR_TO_DYNPTR)
-			break;
-		if (arg_type & MEM_UNINIT) {
-			if (!is_dynptr_reg_valid_uninit(env, reg)) {
-				verbose(env, "Dynptr has to be an uninitialized dynptr\n");
-				return -EINVAL;
-			}
-
-			/* We only support one dynptr being uninitialized at the moment,
-			 * which is sufficient for the helper functions we have right now.
-			 */
-			if (meta->uninit_dynptr_regno) {
-				verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
-				return -EFAULT;
-			}
-
-			meta->uninit_dynptr_regno = regno;
-		} else if (!is_dynptr_reg_valid_init(env, reg)) {
-			verbose(env,
-				"Expected an initialized dynptr as arg #%d\n",
-				arg + 1);
-			return -EINVAL;
-		} else if (!is_dynptr_type_expected(env, reg, arg_type)) {
-			const char *err_extra = "";
-
-			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
-			case DYNPTR_TYPE_LOCAL:
-				err_extra = "local";
-				break;
-			case DYNPTR_TYPE_RINGBUF:
-				err_extra = "ringbuf";
-				break;
-			default:
-				err_extra = "<unknown>";
-				break;
-			}
-			verbose(env,
-				"Expected a dynptr of type %s as arg #%d\n",
-				err_extra, arg + 1);
-			return -EINVAL;
-		}
+		if (process_dynptr_func(env, regno, arg_type, meta))
+			return -EACCES;
 		break;
 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
 		if (!tnum_is_const(reg->var_off)) {
@@ -8829,22 +8842,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return ret;
 			break;
 		case KF_ARG_PTR_TO_DYNPTR:
-			if (reg->type != PTR_TO_STACK) {
-				verbose(env, "arg#%d expected pointer to stack\n", i);
-				return -EINVAL;
-			}
-
-			if (!is_dynptr_reg_valid_init(env, reg)) {
-				verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n",
-					i, btf_type_str(ref_t), ref_tname);
+			if (reg->type != PTR_TO_STACK &&
+			    reg->type != PTR_TO_DYNPTR) {
+				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
 				return -EINVAL;
 			}
 
-			if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
-				verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
-					i, btf_type_str(ref_t), ref_tname);
-				return -EINVAL;
-			}
+			ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR, NULL);
+			if (ret < 0)
+				return ret;
 			break;
 		case KF_ARG_PTR_TO_LIST_HEAD:
 			if (reg->type != PTR_TO_MAP_VALUE &&
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index 55d641c1f126..a9229260a6ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -18,11 +18,8 @@ static struct {
 	const char *expected_verifier_err_msg;
 	int expected_runtime_err;
 } kfunc_dynptr_tests[] = {
-	{"dynptr_type_not_supp",
-	 "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
-	{"not_valid_dynptr",
-	 "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
-	{"not_ptr_to_stack", "arg#0 expected pointer to stack", 0},
+	{"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0},
+	{"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
 	{"dynptr_data_null", NULL, -EBADMSG},
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index ce39d096bba3..f4a8250329b2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -32,18 +32,6 @@ int err, pid;
 
 char _license[] SEC("license") = "GPL";
 
-SEC("?lsm.s/bpf")
-int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr,
-	     unsigned int size)
-{
-	char write_data[64] = "hello there, world!!";
-	struct bpf_dynptr ptr;
-
-	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
-
-	return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL);
-}
-
 SEC("?lsm.s/bpf")
 int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
 {
-- 
cgit v1.2.3-58-ga151


From 270605317366e4535d8d9fc3d9da1ad0fb3c9d45 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:37 +0530
Subject: bpf: Rework process_dynptr_func

Recently, user ringbuf support introduced a PTR_TO_DYNPTR register type
for use in callback state, because in case of user ringbuf helpers,
there is no dynptr on the stack that is passed into the callback. To
reflect such a state, a special register type was created.

However, some checks have been bypassed incorrectly during the addition
of this feature. First, for arg_type with MEM_UNINIT flag which
initialize a dynptr, they must be rejected for such register type.
Secondly, in the future, there are plans to add dynptr helpers that
operate on the dynptr itself and may change its offset and other
properties.

In all of these cases, PTR_TO_DYNPTR shouldn't be allowed to be passed
to such helpers, however the current code simply returns 0.

The rejection for helpers that release the dynptr is already handled.

For fixing this, we take a step back and rework existing code in a way
that will allow fitting in all classes of helpers and have a coherent
model for dealing with the variety of use cases in which dynptr is used.

First, for ARG_PTR_TO_DYNPTR, it can either be set alone or together
with a DYNPTR_TYPE_* constant that denotes the only type it accepts.

Next, helpers which initialize a dynptr use MEM_UNINIT to indicate this
fact. To make the distinction clear, use MEM_RDONLY flag to indicate
that the helper only operates on the memory pointed to by the dynptr,
not the dynptr itself. In C parlance, it would be equivalent to taking
the dynptr as a point to const argument.

When either of these flags are not present, the helper is allowed to
mutate both the dynptr itself and also the memory it points to.
Currently, the read only status of the memory is not tracked in the
dynptr, but it would be trivial to add this support inside dynptr state
of the register.

With these changes and renaming PTR_TO_DYNPTR to CONST_PTR_TO_DYNPTR to
better reflect its usage, it can no longer be passed to helpers that
initialize a dynptr, i.e. bpf_dynptr_from_mem, bpf_ringbuf_reserve_dynptr.

A note to reviewers is that in code that does mark_stack_slots_dynptr,
and unmark_stack_slots_dynptr, we implicitly rely on the fact that
PTR_TO_STACK reg is the only case that can reach that code path, as one
cannot pass CONST_PTR_TO_DYNPTR to helpers that don't set MEM_RDONLY. In
both cases such helpers won't be setting that flag.

The next patch will add a couple of selftest cases to make sure this
doesn't break.

Fixes: 205715673844 ("bpf: Add bpf_user_ringbuf_drain() helper")
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |   4 +-
 include/uapi/linux/bpf.h                           |   8 +-
 kernel/bpf/helpers.c                               |  18 +-
 kernel/bpf/verifier.c                              | 227 +++++++++++++++------
 scripts/bpf_doc.py                                 |   1 +
 tools/include/uapi/linux/bpf.h                     |   8 +-
 .../selftests/bpf/prog_tests/user_ringbuf.c        |   4 +-
 7 files changed, 191 insertions(+), 79 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4920ac252754..3de24cfb7a3d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -775,7 +775,7 @@ enum bpf_reg_type {
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_BUF,		 /* reg points to a read/write buffer */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
-	PTR_TO_DYNPTR,		 /* reg points to a dynptr */
+	CONST_PTR_TO_DYNPTR,	 /* reg points to a const struct bpf_dynptr */
 	__BPF_REG_TYPE_MAX,
 
 	/* Extended reg_types. */
@@ -2828,7 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 		     enum bpf_dynptr_type type, u32 offset, u32 size);
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_LSM
 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f89de51a45db..464ca3f01fe7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5293,7 +5293,7 @@ union bpf_attr {
  *	Return
  *		Nothing. Always succeeds.
  *
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
  *	Description
  *		Read *len* bytes from *src* into *dst*, starting from *offset*
  *		into *src*.
@@ -5303,7 +5303,7 @@ union bpf_attr {
  *		of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
  *		*flags* is not 0.
  *
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
  *	Description
  *		Write *len* bytes from *src* into *dst*, starting from *offset*
  *		into *dst*.
@@ -5313,7 +5313,7 @@ union bpf_attr {
  *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
  *		is a read-only dynptr or if *flags* is not 0.
  *
- * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
  *	Description
  *		Get a pointer to the underlying dynptr data.
  *
@@ -5414,7 +5414,7 @@ union bpf_attr {
  *		Drain samples from the specified user ring buffer, and invoke
  *		the provided callback for each such sample:
  *
- *		long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ *		long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
  *
  *		If **callback_fn** returns 0, the helper will continue to try
  *		and drain the next sample, up to a maximum of
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6ed5875b97a3..fa1093d4ad6b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1404,7 +1404,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
 #define DYNPTR_SIZE_MASK	0xFFFFFF
 #define DYNPTR_RDONLY_BIT	BIT(31)
 
-static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
 {
 	return ptr->size & DYNPTR_RDONLY_BIT;
 }
@@ -1414,7 +1414,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
 	ptr->size |= type << DYNPTR_TYPE_SHIFT;
 }
 
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
 {
 	return ptr->size & DYNPTR_SIZE_MASK;
 }
@@ -1438,7 +1438,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 	memset(ptr, 0, sizeof(*ptr));
 }
 
-static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
 {
 	u32 size = bpf_dynptr_get_size(ptr);
 
@@ -1483,7 +1483,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
 	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
 };
 
-BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
 	   u32, offset, u64, flags)
 {
 	int err;
@@ -1506,12 +1506,12 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
-	.arg3_type	= ARG_PTR_TO_DYNPTR,
+	.arg3_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
 	.arg4_type	= ARG_ANYTHING,
 	.arg5_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
 	   u32, len, u64, flags)
 {
 	int err;
@@ -1532,14 +1532,14 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
 	.func		= bpf_dynptr_write,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
 	.arg2_type	= ARG_ANYTHING,
 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
 	.arg5_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
 {
 	int err;
 
@@ -1560,7 +1560,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
 	.func		= bpf_dynptr_data,
 	.gpl_only	= false,
 	.ret_type	= RET_PTR_TO_DYNPTR_MEM_OR_NULL,
-	.arg1_type	= ARG_PTR_TO_DYNPTR,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
 	.arg2_type	= ARG_ANYTHING,
 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 };
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index eb742ac75844..a880776bd999 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -592,7 +592,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 		[PTR_TO_BUF]		= "buf",
 		[PTR_TO_FUNC]		= "func",
 		[PTR_TO_MAP_KEY]	= "map_key",
-		[PTR_TO_DYNPTR]		= "dynptr_ptr",
+		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
 	};
 
 	if (type & PTR_MAYBE_NULL) {
@@ -725,6 +725,28 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
 	return type == BPF_DYNPTR_TYPE_RINGBUF;
 }
 
+static void __mark_dynptr_reg(struct bpf_reg_state *reg,
+			      enum bpf_dynptr_type type,
+			      bool first_slot);
+
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+				struct bpf_reg_state *reg);
+
+static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
+				   struct bpf_reg_state *sreg2,
+				   enum bpf_dynptr_type type)
+{
+	__mark_dynptr_reg(sreg1, type, true);
+	__mark_dynptr_reg(sreg2, type, false);
+}
+
+static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
+			       enum bpf_dynptr_type type)
+{
+	__mark_dynptr_reg(reg, type, true);
+}
+
+
 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				   enum bpf_arg_type arg_type, int insn_idx)
 {
@@ -746,9 +768,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 	if (type == BPF_DYNPTR_TYPE_INVALID)
 		return -EINVAL;
 
-	state->stack[spi].spilled_ptr.dynptr.first_slot = true;
-	state->stack[spi].spilled_ptr.dynptr.type = type;
-	state->stack[spi - 1].spilled_ptr.dynptr.type = type;
+	mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
+			       &state->stack[spi - 1].spilled_ptr, type);
 
 	if (dynptr_type_refcounted(type)) {
 		/* The id is used to track proper releasing */
@@ -756,8 +777,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 		if (id < 0)
 			return id;
 
-		state->stack[spi].spilled_ptr.id = id;
-		state->stack[spi - 1].spilled_ptr.id = id;
+		state->stack[spi].spilled_ptr.ref_obj_id = id;
+		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
 	}
 
 	return 0;
@@ -779,25 +800,23 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
 	}
 
 	/* Invalidate any slices associated with this dynptr */
-	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
-		release_reference(env, state->stack[spi].spilled_ptr.id);
-		state->stack[spi].spilled_ptr.id = 0;
-		state->stack[spi - 1].spilled_ptr.id = 0;
-	}
-
-	state->stack[spi].spilled_ptr.dynptr.first_slot = false;
-	state->stack[spi].spilled_ptr.dynptr.type = 0;
-	state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
+	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
+		WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
 
+	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 	return 0;
 }
 
 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
-	int spi = get_spi(reg->off);
-	int i;
+	int spi, i;
+
+	if (reg->type == CONST_PTR_TO_DYNPTR)
+		return false;
 
+	spi = get_spi(reg->off);
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 		return true;
 
@@ -813,9 +832,14 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
-	int spi = get_spi(reg->off);
+	int spi;
 	int i;
 
+	/* This already represents first slot of initialized bpf_dynptr */
+	if (reg->type == CONST_PTR_TO_DYNPTR)
+		return true;
+
+	spi = get_spi(reg->off);
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
 	    !state->stack[spi].spilled_ptr.dynptr.first_slot)
 		return false;
@@ -834,15 +858,19 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
 {
 	struct bpf_func_state *state = func(env, reg);
 	enum bpf_dynptr_type dynptr_type;
-	int spi = get_spi(reg->off);
+	int spi;
 
 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
 	if (arg_type == ARG_PTR_TO_DYNPTR)
 		return true;
 
 	dynptr_type = arg_to_dynptr_type(arg_type);
-
-	return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+	if (reg->type == CONST_PTR_TO_DYNPTR) {
+		return reg->dynptr.type == dynptr_type;
+	} else {
+		spi = get_spi(reg->off);
+		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+	}
 }
 
 /* The reg state of a pointer or a bounded scalar was saved when
@@ -1354,9 +1382,6 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
-				struct bpf_reg_state *reg);
-
 /* This helper doesn't clear reg->id */
 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 {
@@ -1419,6 +1444,19 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
 	__mark_reg_known_zero(regs + regno);
 }
 
+static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
+			      bool first_slot)
+{
+	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
+	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
+	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
+	 */
+	__mark_reg_known_zero(reg);
+	reg->type = CONST_PTR_TO_DYNPTR;
+	reg->dynptr.type = type;
+	reg->dynptr.first_slot = first_slot;
+}
+
 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
 {
 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
@@ -5857,19 +5895,58 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 	return 0;
 }
 
+/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
+ * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
+ *
+ * In both cases we deal with the first 8 bytes, but need to mark the next 8
+ * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
+ * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
+ *
+ * Mutability of bpf_dynptr is at two levels, one is at the level of struct
+ * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
+ * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
+ * mutate the view of the dynptr and also possibly destroy it. In the latter
+ * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
+ * memory that dynptr points to.
+ *
+ * The verifier will keep track both levels of mutation (bpf_dynptr's in
+ * reg->type and the memory's in reg->dynptr.type), but there is no support for
+ * readonly dynptr view yet, hence only the first case is tracked and checked.
+ *
+ * This is consistent with how C applies the const modifier to a struct object,
+ * where the pointer itself inside bpf_dynptr becomes const but not what it
+ * points to.
+ *
+ * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
+ * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
+ */
 int process_dynptr_func(struct bpf_verifier_env *env, int regno,
-			enum bpf_arg_type arg_type,
-			struct bpf_call_arg_meta *meta)
+			enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
 {
 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 
-	/* We only need to check for initialized / uninitialized helper
-	 * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
-	 * assumption is that if it is, that a helper function
-	 * initialized the dynptr on behalf of the BPF program.
+	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
+	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
+	 */
+	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
+		verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
+		return -EFAULT;
+	}
+	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
+	 *		 constructing a mutable bpf_dynptr object.
+	 *
+	 *		 Currently, this is only possible with PTR_TO_STACK
+	 *		 pointing to a region of at least 16 bytes which doesn't
+	 *		 contain an existing bpf_dynptr.
+	 *
+	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
+	 *		 mutated or destroyed. However, the memory it points to
+	 *		 may be mutated.
+	 *
+	 *  None       - Points to a initialized dynptr that can be mutated and
+	 *		 destroyed, including mutation of the memory it points
+	 *		 to.
 	 */
-	if (base_type(reg->type) == PTR_TO_DYNPTR)
-		return 0;
 	if (arg_type & MEM_UNINIT) {
 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
@@ -5885,7 +5962,13 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
 		}
 
 		meta->uninit_dynptr_regno = regno;
-	} else {
+	} else /* MEM_RDONLY and None case from above */ {
+		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
+		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
+			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
+			return -EINVAL;
+		}
+
 		if (!is_dynptr_reg_valid_init(env, reg)) {
 			verbose(env,
 				"Expected an initialized dynptr as arg #%d\n",
@@ -5893,7 +5976,8 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
 			return -EINVAL;
 		}
 
-		if (!is_dynptr_type_expected(env, reg, arg_type)) {
+		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
+		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
 			const char *err_extra = "";
 
 			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
@@ -6056,7 +6140,7 @@ static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }
 static const struct bpf_reg_types dynptr_types = {
 	.types = {
 		PTR_TO_STACK,
-		PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,
+		CONST_PTR_TO_DYNPTR,
 	}
 };
 
@@ -6241,12 +6325,16 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
 }
 
-static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
-	int spi = get_spi(reg->off);
+	int spi;
+
+	if (reg->type == CONST_PTR_TO_DYNPTR)
+		return reg->ref_obj_id;
 
-	return state->stack[spi].spilled_ptr.id;
+	spi = get_spi(reg->off);
+	return state->stack[spi].spilled_ptr.ref_obj_id;
 }
 
 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
@@ -6311,11 +6399,22 @@ skip_type_check:
 	if (arg_type_is_release(arg_type)) {
 		if (arg_type_is_dynptr(arg_type)) {
 			struct bpf_func_state *state = func(env, reg);
-			int spi = get_spi(reg->off);
+			int spi;
 
-			if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
-			    !state->stack[spi].spilled_ptr.id) {
-				verbose(env, "arg %d is an unacquired reference\n", regno);
+			/* Only dynptr created on stack can be released, thus
+			 * the get_spi and stack state checks for spilled_ptr
+			 * should only be done before process_dynptr_func for
+			 * PTR_TO_STACK.
+			 */
+			if (reg->type == PTR_TO_STACK) {
+				spi = get_spi(reg->off);
+				if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+				    !state->stack[spi].spilled_ptr.ref_obj_id) {
+					verbose(env, "arg %d is an unacquired reference\n", regno);
+					return -EINVAL;
+				}
+			} else {
+				verbose(env, "cannot release unowned const bpf_dynptr\n");
 				return -EINVAL;
 			}
 		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
@@ -7289,11 +7388,10 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
 {
 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
 	 *			  callback_ctx, u64 flags);
-	 * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx);
+	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
 	 */
 	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
-	callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL;
-	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+	mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 
 	/* unused */
@@ -7687,7 +7785,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	regs = cur_regs(env);
 
+	/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+	 * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
+	 * is safe to do directly.
+	 */
 	if (meta.uninit_dynptr_regno) {
+		if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
+			verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
+			return -EFAULT;
+		}
 		/* we write BPF_DW bits (8 bytes) at a time */
 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
 			err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
@@ -7705,15 +7811,24 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	if (meta.release_regno) {
 		err = -EINVAL;
-		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
+		/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+		 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
+		 * is safe to do directly.
+		 */
+		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
+			if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
+				verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
+				return -EFAULT;
+			}
 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
-		else if (meta.ref_obj_id)
+		} else if (meta.ref_obj_id) {
 			err = release_reference(env, meta.ref_obj_id);
-		/* meta.ref_obj_id can only be 0 if register that is meant to be
-		 * released is NULL, which must be > R0.
-		 */
-		else if (register_is_null(&regs[meta.release_regno]))
+		} else if (register_is_null(&regs[meta.release_regno])) {
+			/* meta.ref_obj_id can only be 0 if register that is meant to be
+			 * released is NULL, which must be > R0.
+			 */
 			err = 0;
+		}
 		if (err) {
 			verbose(env, "func %s#%d reference has not been acquired before\n",
 				func_id_name(func_id), func_id);
@@ -7787,11 +7902,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 					return -EFAULT;
 				}
 
-				if (base_type(reg->type) != PTR_TO_DYNPTR)
-					/* Find the id of the dynptr we're
-					 * tracking the reference of
-					 */
-					meta.ref_obj_id = stack_slot_get_id(env, reg);
+				meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
 				break;
 			}
 		}
@@ -8848,12 +8959,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_DYNPTR:
 			if (reg->type != PTR_TO_STACK &&
-			    reg->type != PTR_TO_DYNPTR) {
+			    reg->type != CONST_PTR_TO_DYNPTR) {
 				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
 				return -EINVAL;
 			}
 
-			ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR, NULL);
+			ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
 			if (ret < 0)
 				return ret;
 			break;
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index fdb0aff8cb5a..e8d90829f23e 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -752,6 +752,7 @@ class PrinterHelpers(Printer):
             'struct bpf_timer',
             'struct mptcp_sock',
             'struct bpf_dynptr',
+            'const struct bpf_dynptr',
             'struct iphdr',
             'struct ipv6hdr',
     }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f89de51a45db..464ca3f01fe7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5293,7 +5293,7 @@ union bpf_attr {
  *	Return
  *		Nothing. Always succeeds.
  *
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
  *	Description
  *		Read *len* bytes from *src* into *dst*, starting from *offset*
  *		into *src*.
@@ -5303,7 +5303,7 @@ union bpf_attr {
  *		of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
  *		*flags* is not 0.
  *
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
  *	Description
  *		Write *len* bytes from *src* into *dst*, starting from *offset*
  *		into *dst*.
@@ -5313,7 +5313,7 @@ union bpf_attr {
  *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
  *		is a read-only dynptr or if *flags* is not 0.
  *
- * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
  *	Description
  *		Get a pointer to the underlying dynptr data.
  *
@@ -5414,7 +5414,7 @@ union bpf_attr {
  *		Drain samples from the specified user ring buffer, and invoke
  *		the provided callback for each such sample:
  *
- *		long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ *		long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
  *
  *		If **callback_fn** returns 0, the helper will continue to try
  *		and drain the next sample, up to a maximum of
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index 02b18d018b36..aefa0a474e58 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -673,8 +673,8 @@ static struct {
 	{"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"},
 	{"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"},
 	{"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"},
-	{"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"},
-	{"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"},
+	{"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"},
+	{"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"},
 	{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
 };
 
-- 
cgit v1.2.3-58-ga151


From 184c9bdb8f65d9f909b3a089a83bc0b0f1e1ea4c Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:38 +0530
Subject: bpf: Rework check_func_arg_reg_off

While check_func_arg_reg_off is the place which performs generic checks
needed by various candidates of reg->type, there is some handling for
special cases, like ARG_PTR_TO_DYNPTR, OBJ_RELEASE, and
ARG_PTR_TO_RINGBUF_MEM.

This commit aims to streamline these special cases and instead leave
other things up to argument type specific code to handle. The function
will be restrictive by default, and cover all possible cases when
OBJ_RELEASE is set, without having to update the function again (and
missing to do that being a bug).

This is done primarily for two reasons: associating back reg->type to
its argument leaves room for the list getting out of sync when a new
reg->type is supported by an arg_type.

The other case is ARG_PTR_TO_RINGBUF_MEM. The problem there is something
we already handle, whenever a release argument is expected, it should
be passed as the pointer that was received from the acquire function.
Hence zero fixed and variable offset.

There is nothing special about ARG_PTR_TO_RINGBUF_MEM, where technically
its target register type PTR_TO_MEM | MEM_RINGBUF can already be passed
with non-zero offset to other helper functions, which makes sense.

Hence, lift the arg_type_is_release check for reg->off and cover all
possible register types, instead of duplicating the same kind of check
twice for current OBJ_RELEASE arg_types (alloc_mem and ptr_to_btf_id).

For the release argument, arg_type_is_dynptr is the special case, where
we go to actual object being freed through the dynptr, so the offset of
the pointer still needs to allow fixed and variable offset and
process_dynptr_func will verify them later for the release argument case
as well.

This is not specific to ARG_PTR_TO_DYNPTR though, we will need to make
this exception for any future object on the stack that needs to be
released. In this sense, PTR_TO_STACK as a candidate for object on stack
argument is a special case for release offset checks, and they need to
be done by the helper releasing the object on stack.

Since the check has been lifted above all register type checks, remove
the duplicated check that is being done for PTR_TO_BTF_ID.

Acked-by: Joanne Koong <joannelkoong@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                          | 63 ++++++++++++++++----------
 tools/testing/selftests/bpf/verifier/calls.c   |  2 +-
 tools/testing/selftests/bpf/verifier/ringbuf.c |  2 +-
 3 files changed, 40 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a880776bd999..8a7e964bcc31 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6269,11 +6269,37 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
 			   enum bpf_arg_type arg_type)
 {
-	enum bpf_reg_type type = reg->type;
-	bool fixed_off_ok = false;
+	u32 type = reg->type;
+
+	/* When referenced register is passed to release function, its fixed
+	 * offset must be 0.
+	 *
+	 * We will check arg_type_is_release reg has ref_obj_id when storing
+	 * meta->release_regno.
+	 */
+	if (arg_type_is_release(arg_type)) {
+		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
+		 * may not directly point to the object being released, but to
+		 * dynptr pointing to such object, which might be at some offset
+		 * on the stack. In that case, we simply to fallback to the
+		 * default handling.
+		 */
+		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
+			return 0;
+		/* Doing check_ptr_off_reg check for the offset will catch this
+		 * because fixed_off_ok is false, but checking here allows us
+		 * to give the user a better error message.
+		 */
+		if (reg->off) {
+			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
+				regno);
+			return -EINVAL;
+		}
+		return __check_ptr_off_reg(env, reg, regno, false);
+	}
 
-	switch ((u32)type) {
-	/* Pointer types where reg offset is explicitly allowed: */
+	switch (type) {
+	/* Pointer types where both fixed and variable offset is explicitly allowed: */
 	case PTR_TO_STACK:
 		if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
 			verbose(env, "cannot pass in dynptr at an offset\n");
@@ -6290,12 +6316,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	case PTR_TO_BUF:
 	case PTR_TO_BUF | MEM_RDONLY:
 	case SCALAR_VALUE:
-		/* Some of the argument types nevertheless require a
-		 * zero register offset.
-		 */
-		if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM)
-			return 0;
-		break;
+		return 0;
 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
 	 * fixed offset.
 	 */
@@ -6305,24 +6326,16 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	case PTR_TO_BTF_ID | MEM_RCU:
 	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 		/* When referenced PTR_TO_BTF_ID is passed to release function,
-		 * it's fixed offset must be 0.	In the other cases, fixed offset
-		 * can be non-zero.
+		 * its fixed offset must be 0. In the other cases, fixed offset
+		 * can be non-zero. This was already checked above. So pass
+		 * fixed_off_ok as true to allow fixed offset for all other
+		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
+		 * still need to do checks instead of returning.
 		 */
-		if (arg_type_is_release(arg_type) && reg->off) {
-			verbose(env, "R%d must have zero offset when passed to release func\n",
-				regno);
-			return -EINVAL;
-		}
-		/* For arg is release pointer, fixed_off_ok must be false, but
-		 * we already checked and rejected reg->off != 0 above, so set
-		 * to true to allow fixed offset for all other cases.
-		 */
-		fixed_off_ok = true;
-		break;
+		return __check_ptr_off_reg(env, reg, regno, true);
 	default:
-		break;
+		return __check_ptr_off_reg(env, reg, regno, false);
 	}
-	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
 }
 
 static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 3193915c5ee6..babcec123251 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -76,7 +76,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "arg#0 expected pointer to ctx, but got PTR",
+	.errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_pass_ctx", 2 },
 	},
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
index 84838feba47f..92e3f6a61a79 100644
--- a/tools/testing/selftests/bpf/verifier/ringbuf.c
+++ b/tools/testing/selftests/bpf/verifier/ringbuf.c
@@ -28,7 +28,7 @@
 	},
 	.fixup_map_ringbuf = { 1 },
 	.result = REJECT,
-	.errstr = "dereference of modified ringbuf_mem ptr R1",
+	.errstr = "R1 must have zero offset when passed to release func",
 },
 {
 	"ringbuf: invalid reservation offset 2",
-- 
cgit v1.2.3-58-ga151


From 292064cce7969787f13ed9988733aadc12fe0ca2 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 8 Dec 2022 02:11:41 +0530
Subject: selftests/bpf: Add test for dynptr reinit in user_ringbuf callback

The original support for bpf_user_ringbuf_drain callbacks simply
short-circuited checks for the dynptr state, allowing users to pass
PTR_TO_DYNPTR (now CONST_PTR_TO_DYNPTR) to helpers that initialize a
dynptr. This bug would have also surfaced with other dynptr helpers in
the future that changed dynptr view or modified it in some way.

Include test cases for all cases, i.e. both bpf_dynptr_from_mem and
bpf_ringbuf_reserve_dynptr, and ensure verifier rejects both of them.
Without the fix, both of these programs load and pass verification.

While at it, remove sys_nanosleep target from failure cases' SEC
definition, as there is no such tracepoint.

Acked-by: David Vernet <void@manifault.com>
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221207204141.308952-8-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/user_ringbuf.c        |  2 +
 .../selftests/bpf/progs/user_ringbuf_fail.c        | 51 ++++++++++++++++++----
 2 files changed, 45 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index aefa0a474e58..dae68de285b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -676,6 +676,8 @@ static struct {
 	{"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"},
 	{"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"},
 	{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
+	{"user_ringbuf_callback_reinit_dynptr_mem", "Dynptr has to be an uninitialized dynptr"},
+	{"user_ringbuf_callback_reinit_dynptr_ringbuf", "Dynptr has to be an uninitialized dynptr"},
 };
 
 #define SUCCESS_TEST(_func) { _func, #_func }
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 82aba4529aa9..f3201dc69a60 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -18,6 +18,13 @@ struct {
 	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
 } user_ringbuf SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 2);
+} ringbuf SEC(".maps");
+
+static int map_value;
+
 static long
 bad_access1(struct bpf_dynptr *dynptr, void *context)
 {
@@ -32,7 +39,7 @@ bad_access1(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read before the pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_bad_access1(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
@@ -54,7 +61,7 @@ bad_access2(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_bad_access2(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
@@ -73,7 +80,7 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_write_forbidden(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
@@ -92,7 +99,7 @@ null_context_write(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_null_context_write(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
@@ -113,7 +120,7 @@ null_context_read(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_null_context_read(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
@@ -132,7 +139,7 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_discard_dynptr(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
@@ -151,7 +158,7 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_submit_dynptr(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
@@ -168,10 +175,38 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp/")
 int user_ringbuf_callback_invalid_return(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
 
 	return 0;
 }
+
+static long
+try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context)
+{
+	bpf_dynptr_from_mem(&map_value, 4, 0, dynptr);
+	return 0;
+}
+
+static long
+try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
+{
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr);
+	return 0;
+}
+
+SEC("?raw_tp/")
+int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
+	return 0;
+}
+
+SEC("?raw_tp/")
+int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
+	return 0;
+}
-- 
cgit v1.2.3-58-ga151


From db401875f438168c5804b295b93a28c7730bb57a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 7 Dec 2022 13:36:47 +0100
Subject: selftests: mlxsw: Move IPv6 decap_error test to shared directory

Now that Spectrum-1 gained ip6gre support we can move the test out of
the Spectrum-2 directory.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh | 250 +++++++++++++++++++++
 .../mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh  | 250 ---------------------
 2 files changed, 250 insertions(+), 250 deletions(-)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
 delete mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
new file mode 100755
index 000000000000..878125041fc3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |             $swp1 +     |
+# |  2001:db8:1::2/64       |
+# |                         |
+# |  + g1 (ip6gre)          |
+# |    loc=2001:db8:3::1    |
+# |    rem=2001:db8:3::2    |
+# |    tos=inherit          |
+# |                         |
+# |  + $rp1                 |
+# |  | 2001:db8:10::1/64    |
+# +--|----------------------+
+#    |
+# +--|----------------------+
+# |  |                 VRF2 |
+# |  + $rp2                 |
+# |    2001:db8:10::2/64    |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 2001:db8:10::2/64
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:10::2/64
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	__addr_add_del $swp1 add 2001:db8:1::2/64
+	tc qdisc add dev $swp1 clsact
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit
+	ip link set dev g1 up
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 2001:db8:10::1/64
+}
+
+switch_destroy()
+{
+	__addr_add_del $rp1 del 2001:db8:10::1/64
+	ip link set dev $rp1 down
+
+	__addr_add_del g1 del 2001:db8:3::1/128
+	ip link set dev g1 down
+	tunnel_destroy g1
+
+	tc qdisc del dev $swp1 clsact
+	__addr_add_del $swp1 del 2001:db8:1::2/64
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+ipip_payload_get()
+{
+	local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	local flags=$1; shift
+	local key=$1; shift
+
+	p=$(:
+		)"$flags"$(		      : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"86:dd:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
+		)"6"$(	                      : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(		      : Flow label
+		)"00:00:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(                  : IP daddr
+		)
+	echo $p
+}
+
+ecn_payload_get()
+{
+	echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
+		action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
+			tos=$outer_tos,next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local sip=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ipip_payload_get "$@")
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	# Correct source IP - the remote address
+	local sip=2001:db8:3::2
+
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	no_matching_tunnel_test "Decap error: Source IP check failed" \
+		2001:db8:4::2 "0"
+	no_matching_tunnel_test \
+		"Decap error: Key exists but was not expected" $sip "2" \
+		"00:00:00:E9:"
+
+	# Destroy the tunnel and create new one with key
+	__addr_add_del g1 del 2001:db8:3::1/128
+	tunnel_destroy g1
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit key 233
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	no_matching_tunnel_test \
+		"Decap error: Key does not exist but was expected" $sip "0"
+	no_matching_tunnel_test \
+		"Decap error: Packet has a wrong key field" $sip "2" \
+		"00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh
deleted file mode 100755
index f62ce479c266..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test devlink-trap tunnel exceptions functionality over mlxsw.
-# Check all exception traps to make sure they are triggered under the right
-# conditions.
-
-# +-------------------------+
-# | H1                      |
-# |               $h1 +     |
-# |  2001:db8:1::1/64 |     |
-# +-------------------|-----+
-#                     |
-# +-------------------|-----+
-# | SW1               |     |
-# |             $swp1 +     |
-# |  2001:db8:1::2/64       |
-# |                         |
-# |  + g1 (ip6gre)          |
-# |    loc=2001:db8:3::1    |
-# |    rem=2001:db8:3::2    |
-# |    tos=inherit          |
-# |                         |
-# |  + $rp1                 |
-# |  | 2001:db8:10::1/64    |
-# +--|----------------------+
-#    |
-# +--|----------------------+
-# |  |                 VRF2 |
-# |  + $rp2                 |
-# |    2001:db8:10::2/64    |
-# +-------------------------+
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-ALL_TESTS="
-	decap_error_test
-"
-
-NUM_NETIFS=4
-source $lib_dir/lib.sh
-source $lib_dir/tc_common.sh
-source $lib_dir/devlink_lib.sh
-
-h1_create()
-{
-	simple_if_init $h1 2001:db8:1::1/64
-}
-
-h1_destroy()
-{
-	simple_if_fini $h1 2001:db8:1::1/64
-}
-
-vrf2_create()
-{
-	simple_if_init $rp2 2001:db8:10::2/64
-}
-
-vrf2_destroy()
-{
-	simple_if_fini $rp2 2001:db8:10::2/64
-}
-
-switch_create()
-{
-	ip link set dev $swp1 up
-	__addr_add_del $swp1 add 2001:db8:1::2/64
-	tc qdisc add dev $swp1 clsact
-
-	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
-		ttl inherit
-	ip link set dev g1 up
-	__addr_add_del g1 add 2001:db8:3::1/128
-
-	ip link set dev $rp1 up
-	__addr_add_del $rp1 add 2001:db8:10::1/64
-}
-
-switch_destroy()
-{
-	__addr_add_del $rp1 del 2001:db8:10::1/64
-	ip link set dev $rp1 down
-
-	__addr_add_del g1 del 2001:db8:3::1/128
-	ip link set dev g1 down
-	tunnel_destroy g1
-
-	tc qdisc del dev $swp1 clsact
-	__addr_add_del $swp1 del 2001:db8:1::2/64
-	ip link set dev $swp1 down
-}
-
-setup_prepare()
-{
-	h1=${NETIFS[p1]}
-	swp1=${NETIFS[p2]}
-
-	rp1=${NETIFS[p3]}
-	rp2=${NETIFS[p4]}
-
-	forwarding_enable
-	vrf_prepare
-	h1_create
-	switch_create
-	vrf2_create
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	vrf2_destroy
-	switch_destroy
-	h1_destroy
-	vrf_cleanup
-	forwarding_restore
-}
-
-ipip_payload_get()
-{
-	local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
-	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
-	local flags=$1; shift
-	local key=$1; shift
-
-	p=$(:
-		)"$flags"$(		      : GRE flags
-	        )"0:00:"$(                    : Reserved + version
-		)"86:dd:"$(		      : ETH protocol type
-		)"$key"$( 		      : Key
-		)"6"$(	                      : IP version
-		)"0:0"$(		      : Traffic class
-		)"0:00:00:"$(		      : Flow label
-		)"00:00:"$(                   : Payload length
-		)"3a:"$(                      : Next header
-		)"04:"$(                      : Hop limit
-		)"$saddr:"$(                  : IP saddr
-		)"$daddr:"$(                  : IP daddr
-		)
-	echo $p
-}
-
-ecn_payload_get()
-{
-	echo $(ipip_payload_get "0")
-}
-
-ecn_decap_test()
-{
-	local trap_name="decap_error"
-	local desc=$1; shift
-	local ecn_desc=$1; shift
-	local outer_tos=$1; shift
-	local mz_pid
-
-	RET=0
-
-	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
-		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
-		action pass
-
-	rp1_mac=$(mac_get $rp1)
-	rp2_mac=$(mac_get $rp2)
-	payload=$(ecn_payload_get)
-
-	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
-		-A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
-			tos=$outer_tos,next=47,p=$payload -q &
-	mz_pid=$!
-
-	devlink_trap_exception_test $trap_name
-
-	tc_check_packets "dev $swp1 egress" 101 0
-	check_err $? "Packets were not dropped"
-
-	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
-
-	kill $mz_pid && wait $mz_pid &> /dev/null
-	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
-}
-
-no_matching_tunnel_test()
-{
-	local trap_name="decap_error"
-	local desc=$1; shift
-	local sip=$1; shift
-	local mz_pid
-
-	RET=0
-
-	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
-		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
-
-	rp1_mac=$(mac_get $rp1)
-	rp2_mac=$(mac_get $rp2)
-	payload=$(ipip_payload_get "$@")
-
-	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
-		-A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
-	mz_pid=$!
-
-	devlink_trap_exception_test $trap_name
-
-	tc_check_packets "dev $swp1 egress" 101 0
-	check_err $? "Packets were not dropped"
-
-	log_test "$desc"
-
-	kill $mz_pid && wait $mz_pid &> /dev/null
-	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
-}
-
-decap_error_test()
-{
-	# Correct source IP - the remote address
-	local sip=2001:db8:3::2
-
-	ecn_decap_test "Decap error" "ECT(1)" 01
-	ecn_decap_test "Decap error" "ECT(0)" 02
-	ecn_decap_test "Decap error" "CE" 03
-
-	no_matching_tunnel_test "Decap error: Source IP check failed" \
-		2001:db8:4::2 "0"
-	no_matching_tunnel_test \
-		"Decap error: Key exists but was not expected" $sip "2" \
-		"00:00:00:E9:"
-
-	# Destroy the tunnel and create new one with key
-	__addr_add_del g1 del 2001:db8:3::1/128
-	tunnel_destroy g1
-
-	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
-		ttl inherit key 233
-	__addr_add_del g1 add 2001:db8:3::1/128
-
-	no_matching_tunnel_test \
-		"Decap error: Key does not exist but was expected" $sip "0"
-	no_matching_tunnel_test \
-		"Decap error: Packet has a wrong key field" $sip "2" \
-		"00:00:00:E8:"
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-tests_run
-
-exit $EXIT_STATUS
-- 
cgit v1.2.3-58-ga151


From 17961a37ce4077ae43128ba5ce0b1dfd8f02fdb9 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@rivosinc.com>
Date: Tue, 6 Dec 2022 11:28:38 +0100
Subject: selftests: net: Fix O=dir builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The BPF Makefile in net/bpf did incorrect path substitution for O=dir
builds, e.g.

  make O=/tmp/kselftest headers
  make O=/tmp/kselftest -C tools/testing/selftests

would fail in selftest builds [1] net/ with

  clang-16: error: no such file or directory: 'kselftest/net/bpf/nat6to4.c'
  clang-16: error: no input files

Add a pattern prerequisite and an order-only-prerequisite (for
creating the directory), to resolve the issue.

[1] https://lore.kernel.org/all/202212060009.34CkQmCN-lkp@intel.com/

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 837a3d66d698 ("selftests: net: Add cross-compilation support for BPF programs")
Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://lore.kernel.org/r/20221206102838.272584-1-bjorn@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bpf/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
index a26cb94354f6..4abaf16d2077 100644
--- a/tools/testing/selftests/net/bpf/Makefile
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -12,7 +12,7 @@ CCINCLUDE += -I$(SCRATCH_DIR)/include
 
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 
-MAKE_DIRS := $(BUILD_DIR)/libbpf
+MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf
 $(MAKE_DIRS):
 	mkdir -p $@
 
@@ -37,8 +37,8 @@ endif
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 
-$(TEST_CUSTOM_PROGS): $(BPFOBJ)
-	$(CLANG) -O2 -target bpf -c $(@:.o=.c) $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS)
+	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
 
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   $(APIDIR)/linux/bpf.h					       \
-- 
cgit v1.2.3-58-ga151


From cb578c1c9cf60275f258b5af7929c2a54a188f66 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 9 Dec 2022 15:57:28 +0200
Subject: selftests/bpf: test cases for regsafe() bug skipping check_id()

Under certain conditions it was possible for verifier.c:regsafe() to
skip check_id() call. This commit adds negative test cases previously
errorneously accepted as safe.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221209135733.28851-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/verifier/direct_packet_access.c  | 54 ++++++++++++++++++++++
 .../testing/selftests/bpf/verifier/value_or_null.c | 49 ++++++++++++++++++++
 2 files changed, 103 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index 11acd1855acf..dce2e28aeb43 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -654,3 +654,57 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
+{
+	"direct packet access: test30 (check_id() in regsafe(), bad access)",
+	.insns = {
+	/* r9 = ctx */
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+	/* r7 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r6 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* r2 = ctx->data
+	 * r3 = ctx->data
+	 * r4 = ctx->data_end
+	 */
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9, offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9, offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_9, offsetof(struct __sk_buff, data_end)),
+	/* if r6 > 100 goto exit
+	 * if r7 > 100 goto exit
+	 */
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 100, 9),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 100, 8),
+	/* r2 += r6              ; this forces assignment of ID to r2
+	 * r2 += 1               ; get some fixed off for r2
+	 * r3 += r7              ; this forces assignment of ID to r3
+	 * r3 += 1               ; get some fixed off for r3
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
+	/* if r6 > r7 goto +1    ; no new information about the state is derived from
+	 *                       ; this check, thus produced verifier states differ
+	 *                       ; only in 'insn_idx'
+	 * r2 = r3               ; optionally share ID between r2 and r3
+	 */
+	BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+	/* if r3 > ctx->data_end goto exit */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 1),
+	/* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2,
+	 *                       ; this is not always safe
+	 */
+	BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, -1),
+	/* exit(0) */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.flags = BPF_F_TEST_STATE_FREQ,
+	.result = REJECT,
+	.errstr = "invalid access to packet, off=0 size=1, R2",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 3ecb70a3d939..52a8bca14f03 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -169,3 +169,52 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = ACCEPT,
 },
+{
+	"MAP_VALUE_OR_NULL check_ids() in regsafe()",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* r9 = map_lookup_elem(...) */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+	/* r8 = map_lookup_elem(...) */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+	/* r7 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r6 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* if r6 > r7 goto +1    ; no new information about the state is derived from
+	 *                       ; this check, thus produced verifier states differ
+	 *                       ; only in 'insn_idx'
+	 * r9 = r8               ; optionally share ID between r9 and r8
+	 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+	/* if r9 == 0 goto <exit> */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+	/* read map value via r8, this is not always
+	 * safe because r8 might be not equal to r9.
+	 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+	/* exit 0 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.flags = BPF_F_TEST_STATE_FREQ,
+	.fixup_map_hash_8b = { 3, 9 },
+	.result = REJECT,
+	.errstr = "R8 invalid mem access 'map_value_or_null'",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
-- 
cgit v1.2.3-58-ga151


From 7d05794330877986f605c1618534d7478030f5b8 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 9 Dec 2022 15:57:30 +0200
Subject: selftests/bpf: verify states_equal() maintains idmap across all
 frames

A test case that would erroneously pass verification if
verifier.c:states_equal() maintains separate register ID mappings for
call frames.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221209135733.28851-5-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/calls.c | 82 ++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index babcec123251..9d993926bf0e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -2305,3 +2305,85 @@
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
+/* Make sure that verifier.c:states_equal() considers IDs from all
+ * frames when building 'idmap' for check_ids().
+ */
+{
+	"calls: check_ids() across call boundary",
+	.insns = {
+	/* Function main() */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24),
+	/* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32),
+	/* call foo(&fp[-24], &fp[-32])   ; both arguments have IDs in the current
+	 *                                ; stack frame
+	 */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+	BPF_CALL_REL(2),
+	/* exit 0 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* Function foo()
+	 *
+	 * r9 = &frame[0].fp[-24]  ; save arguments in the callee saved registers,
+	 * r8 = &frame[0].fp[-32]  ; arguments are pointers to pointers to map value
+	 */
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_2),
+	/* r7 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r6 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* if r6 > r7 goto +1      ; no new information about the state is derived from
+	 *                         ; this check, thus produced verifier states differ
+	 *                         ; only in 'insn_idx'
+	 * r9 = r8
+	 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+	/* r9 = *r9                ; verifier get's to this point via two paths:
+	 *                         ; (I) one including r9 = r8, verified first;
+	 *                         ; (II) one excluding r9 = r8, verified next.
+	 *                         ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
+	 *                         ; Suppose that checkpoint is created here via path (I).
+	 *                         ; When verifying via (II) the r9.id must be compared against
+	 *                         ; frame[0].fp[-24].id, otherwise (I) and (II) would be
+	 *                         ; incorrectly deemed equivalent.
+	 * if r9 == 0 goto <exit>
+	 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+	/* r8 = *r8                ; read map value via r8, this is not safe
+	 * r0 = *r8                ; because r8 might be not equal to r9.
+	 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+	/* exit 0 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.flags = BPF_F_TEST_STATE_FREQ,
+	.fixup_map_hash_8b = { 3, 9 },
+	.result = REJECT,
+	.errstr = "R8 invalid mem access 'map_value_or_null'",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
-- 
cgit v1.2.3-58-ga151


From 2026f2062df860e5d282ffd4962ea5d5ed53dc51 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 9 Dec 2022 15:57:32 +0200
Subject: selftests/bpf: Add pruning test case for bpf_spin_lock

Test that when reg->id is not same for the same register of type
PTR_TO_MAP_VALUE between current and old explored state, we currently
return false from regsafe and continue exploring.

Without the fix in prior commit, the test case fails.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221209135733.28851-7-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/spin_lock.c | 39 ++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c
index 781621facae4..0a8dcfc37fc6 100644
--- a/tools/testing/selftests/bpf/verifier/spin_lock.c
+++ b/tools/testing/selftests/bpf/verifier/spin_lock.c
@@ -331,3 +331,42 @@
 	.errstr = "inside bpf_spin_lock",
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
+{
+	"spin_lock: regsafe compare reg->id for map value",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_6, offsetof(struct __sk_buff, mark)),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 2 },
+	.result = REJECT,
+	.errstr = "bpf_spin_unlock of different lock",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.flags = BPF_F_TEST_STATE_FREQ,
+},
-- 
cgit v1.2.3-58-ga151


From efd6286ff74a2fa2b45ed070d344cc0822b8ea6e Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Fri, 9 Dec 2022 15:57:33 +0200
Subject: selftests/bpf: test case for relaxed prunning of active_lock.id

Check that verifier.c:states_equal() uses check_ids() to match
consistent active_lock/map_value configurations. This allows to prune
states with active spin locks even if numerical values of
active_lock ids do not match across compared states.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20221209135733.28851-8-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/spin_lock.c | 75 ++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c
index 0a8dcfc37fc6..eaf114f07e2e 100644
--- a/tools/testing/selftests/bpf/verifier/spin_lock.c
+++ b/tools/testing/selftests/bpf/verifier/spin_lock.c
@@ -370,3 +370,78 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.flags = BPF_F_TEST_STATE_FREQ,
 },
+/* Make sure that regsafe() compares ids for spin lock records using
+ * check_ids():
+ *  1: r9 = map_lookup_elem(...)  ; r9.id == 1
+ *  2: r8 = map_lookup_elem(...)  ; r8.id == 2
+ *  3: r7 = ktime_get_ns()
+ *  4: r6 = ktime_get_ns()
+ *  5: if r6 > r7 goto <9>
+ *  6: spin_lock(r8)
+ *  7: r9 = r8
+ *  8: goto <10>
+ *  9: spin_lock(r9)
+ * 10: spin_unlock(r9)             ; r9.id == 1 || r9.id == 2 and lock is active,
+ *                                 ; second visit to (10) should be considered safe
+ *                                 ; if check_ids() is used.
+ * 11: exit(0)
+ */
+{
+	"spin_lock: regsafe() check_ids() similar id mappings",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	/* r9 = map_lookup_elem(...) */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 24),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+	/* r8 = map_lookup_elem(...) */
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1,
+		      0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 18),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+	/* r7 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	/* r6 = ktime_get_ns() */
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	/* if r6 > r7 goto +5      ; no new information about the state is derived from
+	 *                         ; this check, thus produced verifier states differ
+	 *                         ; only in 'insn_idx'
+	 * spin_lock(r8)
+	 * r9 = r8
+	 * goto unlock
+	 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 5),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+	BPF_JMP_A(3),
+	/* spin_lock(r9) */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	/* spin_unlock(r9) */
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_EMIT_CALL(BPF_FUNC_spin_unlock),
+	/* exit(0) */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 3, 10 },
+	.result = VERBOSE_ACCEPT,
+	.errstr = "28: safe",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.flags = BPF_F_TEST_STATE_FREQ,
+},
-- 
cgit v1.2.3-58-ga151


From f9923a67ab623723663e8ba3eadf00a5bac82d6e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sat, 10 Dec 2022 16:56:32 +0200
Subject: selftests: forwarding: Rename bridge_mdb test

The test is only concerned with host MDB entries and not with MDB
entries as a whole. Rename the test to reflect that.

Subsequent patches will add a more general test that will contain the
test cases for host MDB entries and remove the current test.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile    |   2 +-
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 103 ---------------------
 .../selftests/net/forwarding/bridge_mdb_host.sh    | 103 +++++++++++++++++++++
 3 files changed, 104 insertions(+), 104 deletions(-)
 delete mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb.sh
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb_host.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index a9c5c1be5088..f2df81ca3179 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,7 +2,7 @@
 
 TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
-	bridge_mdb.sh \
+	bridge_mdb_host.sh \
 	bridge_mdb_port_down.sh \
 	bridge_mld.sh \
 	bridge_port_isolation.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
deleted file mode 100755
index b1ba6876dd86..000000000000
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Verify that adding host mdb entries work as intended for all types of
-# multicast filters: ipv4, ipv6, and mac
-
-ALL_TESTS="mdb_add_del_test"
-NUM_NETIFS=2
-
-TEST_GROUP_IP4="225.1.2.3"
-TEST_GROUP_IP6="ff02::42"
-TEST_GROUP_MAC="01:00:01:c0:ff:ee"
-
-source lib.sh
-
-h1_create()
-{
-	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
-}
-
-h1_destroy()
-{
-	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
-}
-
-switch_create()
-{
-	# Enable multicast filtering
-	ip link add dev br0 type bridge mcast_snooping 1
-
-	ip link set dev $swp1 master br0
-
-	ip link set dev br0 up
-	ip link set dev $swp1 up
-}
-
-switch_destroy()
-{
-	ip link set dev $swp1 down
-	ip link del dev br0
-}
-
-setup_prepare()
-{
-	h1=${NETIFS[p1]}
-	swp1=${NETIFS[p2]}
-
-	vrf_prepare
-
-	h1_create
-	switch_create
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	switch_destroy
-	h1_destroy
-
-	vrf_cleanup
-}
-
-do_mdb_add_del()
-{
-	local group=$1
-	local flag=$2
-
-	RET=0
-	bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
-	check_err $? "Failed adding $group to br0, port br0"
-
-	if [ -z "$flag" ]; then
-	    flag="temp"
-	fi
-
-	bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
-	check_err $? "$group not added with $flag flag"
-
-	bridge mdb del dev br0 port br0 grp $group 2>/dev/null
-	check_err $? "Failed deleting $group from br0, port br0"
-
-	bridge mdb show dev br0 | grep -q $group >/dev/null
-	check_err_fail 1 $? "$group still in mdb after delete"
-
-	log_test "MDB add/del group $group to bridge port br0"
-}
-
-mdb_add_del_test()
-{
-	do_mdb_add_del $TEST_GROUP_MAC permanent
-	do_mdb_add_del $TEST_GROUP_IP4
-	do_mdb_add_del $TEST_GROUP_IP6
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Enable multicast filtering
+	ip link add dev br0 type bridge mcast_snooping 1
+
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+	local group=$1
+	local flag=$2
+
+	RET=0
+	bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+	check_err $? "Failed adding $group to br0, port br0"
+
+	if [ -z "$flag" ]; then
+	    flag="temp"
+	fi
+
+	bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+	check_err $? "$group not added with $flag flag"
+
+	bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+	check_err $? "Failed deleting $group from br0, port br0"
+
+	bridge mdb show dev br0 | grep -q $group >/dev/null
+	check_err_fail 1 $? "$group still in mdb after delete"
+
+	log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+	do_mdb_add_del $TEST_GROUP_MAC permanent
+	do_mdb_add_del $TEST_GROUP_IP4
+	do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-58-ga151


From b6d00da08610e2b17412330d7cfc7c04a3b6570d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sat, 10 Dec 2022 16:56:33 +0200
Subject: selftests: forwarding: Add bridge MDB test

Add a selftests that includes the following test cases:

1. Configuration tests. Both valid and invalid configurations are
   tested across all entry types (e.g., L2, IPv4).

2. Forwarding tests. Both host and port group entries are tested across
   all entry types.

3. Interaction between user installed MDB entries and IGMP / MLD control
   packets.

Example output:

INFO: # Host entries configuration tests
TEST: Common host entries configuration tests (IPv4)                [ OK ]
TEST: Common host entries configuration tests (IPv6)                [ OK ]
TEST: Common host entries configuration tests (L2)                  [ OK ]

INFO: # Port group entries configuration tests - (*, G)
TEST: Common port group entries configuration tests (IPv4 (*, G))   [ OK ]
TEST: Common port group entries configuration tests (IPv6 (*, G))   [ OK ]
TEST: IPv4 (*, G) port group entries configuration tests            [ OK ]
TEST: IPv6 (*, G) port group entries configuration tests            [ OK ]

INFO: # Port group entries configuration tests - (S, G)
TEST: Common port group entries configuration tests (IPv4 (S, G))   [ OK ]
TEST: Common port group entries configuration tests (IPv6 (S, G))   [ OK ]
TEST: IPv4 (S, G) port group entries configuration tests            [ OK ]
TEST: IPv6 (S, G) port group entries configuration tests            [ OK ]

INFO: # Port group entries configuration tests - L2
TEST: Common port group entries configuration tests (L2 (*, G))     [ OK ]
TEST: L2 (*, G) port group entries configuration tests              [ OK ]

INFO: # Forwarding tests
TEST: IPv4 host entries forwarding tests                            [ OK ]
TEST: IPv6 host entries forwarding tests                            [ OK ]
TEST: L2 host entries forwarding tests                              [ OK ]
TEST: IPv4 port group "exclude" entries forwarding tests            [ OK ]
TEST: IPv6 port group "exclude" entries forwarding tests            [ OK ]
TEST: IPv4 port group "include" entries forwarding tests            [ OK ]
TEST: IPv6 port group "include" entries forwarding tests            [ OK ]
TEST: L2 port entries forwarding tests                              [ OK ]

INFO: # Control packets tests
TEST: IGMPv3 MODE_IS_INCLUE tests                                   [ OK ]
TEST: MLDv2 MODE_IS_INCLUDE tests                                   [ OK ]

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile    |    1 +
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 1164 ++++++++++++++++++++
 2 files changed, 1165 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index f2df81ca3179..453ae006fbcf 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,6 +2,7 @@
 
 TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
+	bridge_mdb.sh \
 	bridge_mdb_host.sh \
 	bridge_mdb_port_down.sh \
 	bridge_mld.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..2fa5973c0c28
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,1164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | + $h1.10              |                          | + $h2.10               |
+# | | 192.0.2.1/28        |                          | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |                          | | 2001:db8:1::2/64     |
+# | |                     |                          | |                      |
+# | |  + $h1.20           |                          | |  + $h2.20            |
+# | \  | 198.51.100.1/24  |                          | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR0 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	cfg_test
+	fwd_test
+	ctrl_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
+	bridge vlan add vid 10 dev br0 self
+	bridge vlan add vid 20 dev br0 self
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+
+	tc qdisc add dev br0 clsact
+	tc qdisc add dev $h2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	tc qdisc del dev br0 clsact
+
+	bridge vlan del vid 20 dev $swp2
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	bridge vlan del vid 20 dev br0 self
+	bridge vlan del vid 10 dev br0 self
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+cfg_test_host_common()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local state=$1; shift
+	local invalid_state=$1; shift
+
+	RET=0
+
+	# Check basic add, replace and delete behavior.
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10
+	bridge mdb show dev br0 vid 10 | grep -q "$grp"
+	check_err $? "Failed to add $name host entry"
+
+	bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
+	check_fail $? "Managed to replace $name host entry"
+
+	bridge mdb del dev br0 port br0 grp $grp $state vid 10
+	bridge mdb show dev br0 vid 10 | grep -q "$grp"
+	check_fail $? "Failed to delete $name host entry"
+
+	# Check error cases.
+	bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name host entry with a $invalid_state state"
+
+	bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name host entry with a source"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		filter_mode exclude &> /dev/null
+	check_fail $? "Managed to add $name host entry with a filter mode"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		source_list $src &> /dev/null
+	check_fail $? "Managed to add $name host entry with a source list"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		proto 123 &> /dev/null
+	check_fail $? "Managed to add $name host entry with a protocol"
+
+	log_test "Common host entries configuration tests ($name)"
+}
+
+# Check configuration of host entries from all types.
+cfg_test_host()
+{
+	echo
+	log_info "# Host entries configuration tests"
+
+	cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent"
+	cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent"
+	cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \
+		"permanent" "temp"
+}
+
+cfg_test_port_common()
+{
+	local name=$1;shift
+	local grp_key=$1; shift
+
+	RET=0
+
+	# Check basic add, replace and delete behavior.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	check_err $? "Failed to add $name entry"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+		&> /dev/null
+	check_err $? "Failed to replace $name entry"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	check_fail $? "Failed to delete $name entry"
+
+	# Check default protocol and replacement.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static"
+	check_err $? "$name entry not added with default \"static\" protocol"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+		proto 123
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123"
+	check_err $? "Failed to replace protocol of $name entry"
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+	# Check behavior when VLAN is not specified.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent
+	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
+	bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+	check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent
+	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
+	bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+	check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"
+
+	# Check behavior when bridge port is down.
+	ip link set dev $swp1 down
+
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	check_err $? "Failed to add $name permanent entry when bridge port is down"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+	bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null
+	check_fail $? "Managed to add $name temporary entry when bridge port is down"
+
+	ip link set dev $swp1 up
+	setup_wait_dev $swp1
+
+	# Check error cases.
+	ip link set dev br0 down
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name entry when bridge is down"
+	ip link set dev br0 up
+
+	ip link set dev br0 type bridge mcast_snooping 0
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid \
+		10 &> /dev/null
+	check_fail $? "Managed to add $name entry when multicast snooping is disabled"
+	ip link set dev br0 type bridge mcast_snooping 1
+
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \
+		&> /dev/null
+	check_fail $? "Managed to add $name entry with an invalid VLAN"
+
+	log_test "Common port group entries configuration tests ($name)"
+}
+
+src_list_create()
+{
+	local src_prefix=$1; shift
+	local num_srcs=$1; shift
+	local src_list
+	local i
+
+	for i in $(seq 1 $num_srcs); do
+		src_list=${src_list},${src_prefix}${i}
+	done
+
+	echo $src_list | cut -c 2-
+}
+
+__cfg_test_port_ip_star_g()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local invalid_grp=$1; shift
+	local src_prefix=$1; shift
+	local src1=${src_prefix}1
+	local src2=${src_prefix}2
+	local src3=${src_prefix}3
+	local max_srcs=31
+	local num_srcs
+
+	RET=0
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude"
+	check_err $? "Default filter mode is not \"exclude\""
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check basic add and delete behavior.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $src1
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+	check_err $? "(*, G) entry not created"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	check_err $? "(S, G) entry not created"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+	check_fail $? "(*, G) entry not deleted"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	check_fail $? "(S, G) entry not deleted"
+
+	## State (permanent / temp) tests.
+
+	# Check that group and source timer are not set for permanent entries.
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "permanent"
+	check_err $? "(*, G) entry not added as \"permanent\" when should"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "permanent"
+	check_err $? "(S, G) entry not added as \"permanent\" when should"
+
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q " 0.00"
+	check_err $? "(*, G) \"permanent\" entry has a pending group timer"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "\/0.00"
+	check_err $? "\"permanent\" source entry has a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is set for temporary (*, G) EXCLUDE, but not
+	# the source timer.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "temp"
+	check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "temp"
+	check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"
+
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q " 0.00"
+	check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "\/0.00"
+	check_err $? "\"blocked\" source entry has a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is not set for temporary (*, G) INCLUDE, but
+	# that the source timer is set.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "temp"
+	check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "temp"
+	check_err $? "(S, G) entry not added as \"temp\" when should"
+
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q " 0.00"
+	check_err $? "(*, G) INCLUDE entry has a pending group timer"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "\/0.00"
+	check_fail $? "Source entry does not have a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is never set for (S, G) entries.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q " 0.00"
+	check_err $? "(S, G) entry has a pending group timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Filter mode (include / exclude) tests.
+
+	# Check that (*, G) INCLUDE entries are added with correct filter mode
+	# and that (S, G) entries are not marked as "blocked".
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "include"
+	check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "blocked"
+	check_fail $? "(S, G) entry marked as \"blocked\" when should not"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that (*, G) EXCLUDE entries are added with correct filter mode
+	# and that (S, G) entries are marked as "blocked".
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "exclude"
+	check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "blocked"
+	check_err $? "(S, G) entry not marked as \"blocked\" when should"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Protocol tests.
+
+	# Check that (*, G) and (S, G) entries are added with the specified
+	# protocol.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode exclude source_list $src1 proto zebra
+
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "zebra"
+	check_err $? "(*, G) entry not added with \"zebra\" protocol"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "zebra"
+	check_err $? "(S, G) entry not marked added with \"zebra\" protocol"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Replace tests.
+
+	# Check that state can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "permanent"
+	check_err $? "(*, G) entry not marked as \"permanent\" after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "permanent"
+	check_err $? "(S, G) entry not marked as \"permanent\" after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "temp"
+	check_err $? "(*, G) entry not marked as \"temp\" after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "temp"
+	check_err $? "(S, G) entry not marked as \"temp\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that filter mode can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "include"
+	check_err $? "(*, G) not marked with \"include\" filter mode after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "blocked"
+	check_fail $? "(S, G) marked as \"blocked\" after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "exclude"
+	check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "blocked"
+	check_err $? "(S, G) not marked as \"blocked\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that sources can be added to and removed from the source list.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1,$src2,$src3
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	check_err $? "(S, G) entry for source $src1 not created after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+	check_err $? "(S, G) entry for source $src2 not created after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+	check_err $? "(S, G) entry for source $src3 not created after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1,$src3
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	check_err $? "(S, G) entry for source $src1 not created after second replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+	check_fail $? "(S, G) entry for source $src2 created after second replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+	check_err $? "(S, G) entry for source $src3 not created after second replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that protocol can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1 proto zebra
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1 proto bgp
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
+		grep -q "bgp"
+	check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
+	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+		grep -q "bgp"
+	check_err $? "(S, G) protocol not changed to \"bgp\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Star exclude tests.
+
+	# Check star exclude functionality. When adding a new EXCLUDE (*, G),
+	# it needs to be also added to all (S, G) entries for proper
+	# replication.
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode include source_list $src1
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \
+		grep "$src1" | grep -q "added_by_star_ex"
+	check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+	bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
+
+	## Error cases tests.
+
+	bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid group"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		&> /dev/null
+	check_fail $? "Managed to add an INCLUDE entry with an empty source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $grp &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid source in source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		source_list $src &> /dev/null
+	check_fail $? "Managed to add an entry with a source list and no filter mode"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $src1
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $src1 &> /dev/null
+	check_fail $? "Managed to replace an entry without using replace"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $src1,$src2,$src3 &> /dev/null
+	check_fail $? "Managed to add a source that already has a forwarding entry"
+	bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10
+
+	# Check maximum number of sources.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $(src_list_create $src_prefix $max_srcs)
+	num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \
+		grep "src" | wc -l)
+	[[ $num_srcs -eq $max_srcs ]]
+	check_err $? "Failed to configure maximum number of sources ($max_srcs)"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $(src_list_create $src_prefix $((max_srcs + 1))) \
+		&> /dev/null
+	check_fail $? "Managed to exceed maximum number of sources ($max_srcs)"
+
+	log_test "$name (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_star_g()
+{
+	echo
+	log_info "# Port group entries configuration tests - (*, G)"
+
+	cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1"
+	cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1"
+	__cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2."
+	__cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::"
+}
+
+__cfg_test_port_ip_sg()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local grp_key="grp $grp src $src"
+
+	RET=0
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include"
+	check_err $? "Default filter mode is not \"include\""
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check that entries can be added as both permanent and temp and that
+	# group timer is set correctly.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "permanent"
+	check_err $? "Entry not added as \"permanent\" when should"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "0.00"
+	check_err $? "\"permanent\" entry has a pending group timer"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "temp"
+	check_err $? "Entry not added as \"temp\" when should"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "0.00"
+	check_fail $? "\"temp\" entry has an unpending group timer"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check error cases.
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+		filter_mode include &> /dev/null
+	check_fail $? "Managed to add an entry with a filter mode"
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+		filter_mode include source_list $src &> /dev/null
+	check_fail $? "Managed to add an entry with a source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid source"
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null
+	check_fail $? "Managed to replace an entry without using replace"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check that we can replace available attributes.
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "111"
+	check_err $? "Failed to replace protocol"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "permanent"
+	check_err $? "Entry not marked as \"permanent\" after replace"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "0.00"
+	check_err $? "Entry has a pending group timer after replace"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
+	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "temp"
+	check_err $? "Entry not marked as \"temp\" after replace"
+	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
+		grep -q "0.00"
+	check_fail $? "Entry has an unpending group timer after replace"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check star exclude functionality. When adding a (S, G), all matching
+	# (*, G) ports need to be added to it.
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10
+	bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \
+		grep -q "added_by_star_ex"
+	check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	log_test "$name (S, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_sg()
+{
+	echo
+	log_info "# Port group entries configuration tests - (S, G)"
+
+	cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1"
+	cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1"
+	__cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1"
+	__cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1"
+}
+
+cfg_test_port_ip()
+{
+	cfg_test_port_ip_star_g
+	cfg_test_port_ip_sg
+}
+
+__cfg_test_port_l2()
+{
+	local grp="01:02:03:04:05:06"
+
+	RET=0
+
+	bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \
+		permanent vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with unicast MAC"
+
+	bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \
+		permanent vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with a source"
+
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode include &> /dev/null
+	check_fail $? "Managed to add an entry with a filter mode"
+
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		source_list 00:01:02:03:04:05 &> /dev/null
+	check_fail $? "Managed to add an entry with a source list"
+
+	log_test "L2 (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_l2()
+{
+	echo
+	log_info "# Port group entries configuration tests - L2"
+
+	cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06"
+	__cfg_test_port_l2
+}
+
+# Check configuration of regular (port) entries of all types.
+cfg_test_port()
+{
+	cfg_test_port_ip
+	cfg_test_port_l2
+}
+
+cfg_test()
+{
+	cfg_test_host
+	cfg_test_port
+}
+
+__fwd_test_host_ip()
+{
+	local grp=$1; shift
+	local src=$1; shift
+	local mode=$1; shift
+	local name
+	local eth_type
+
+	RET=0
+
+	if [[ $mode == "-4" ]]; then
+		name="IPv4"
+		eth_type="ipv4"
+	else
+		name="IPv6"
+		eth_type="ipv6"
+	fi
+
+	tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \
+		action drop
+
+	# Packet should only be flooded to multicast router ports when there is
+	# no matching MDB entry. The bridge is not configured as a multicast
+	# router port.
+	$MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 0
+	check_err $? "Packet locally received after flood"
+
+	# Install a regular port group entry and expect the packet to not be
+	# locally received.
+	bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
+	$MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 0
+	check_err $? "Packet locally received after installing a regular entry"
+
+	# Add a host entry and expect the packet to be locally received.
+	bridge mdb add dev br0 port br0 grp $grp temp vid 10
+	$MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet not locally received after adding a host entry"
+
+	# Remove the host entry and expect the packet to not be locally
+	# received.
+	bridge mdb del dev br0 port br0 grp $grp vid 10
+	$MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet locally received after removing a host entry"
+
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower
+
+	log_test "$name host entries forwarding tests"
+}
+
+fwd_test_host_ip()
+{
+	__fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
+	__fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+}
+
+fwd_test_host_l2()
+{
+	local dmac=01:02:03:04:05:06
+
+	RET=0
+
+	tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \
+		dst_mac $dmac action drop
+
+	# Packet should be flooded and locally received when there is no
+	# matching MDB entry.
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet not locally received after flood"
+
+	# Install a regular port group entry and expect the packet to not be
+	# locally received.
+	bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet locally received after installing a regular entry"
+
+	# Add a host entry and expect the packet to be locally received.
+	bridge mdb add dev br0 port br0 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 2
+	check_err $? "Packet not locally received after adding a host entry"
+
+	# Remove the host entry and expect the packet to not be locally
+	# received.
+	bridge mdb del dev br0 port br0 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 2
+	check_err $? "Packet locally received after removing a host entry"
+
+	bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+
+	tc filter del dev br0 ingress protocol all pref 1 handle 1 flower
+
+	log_test "L2 host entries forwarding tests"
+}
+
+fwd_test_host()
+{
+	# Disable multicast router on the bridge to ensure that packets are
+	# only locally received when a matching host entry is present.
+	ip link set dev br0 type bridge mcast_router 0
+
+	fwd_test_host_ip
+	fwd_test_host_l2
+
+	ip link set dev br0 type bridge mcast_router 1
+}
+
+__fwd_test_port_ip()
+{
+	local grp=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mode=$1; shift
+	local filter_mode=$1; shift
+	local name
+	local eth_type
+	local src_list
+
+	RET=0
+
+	if [[ $mode == "-4" ]]; then
+		name="IPv4"
+		eth_type="ipv4"
+	else
+		name="IPv6"
+		eth_type="ipv6"
+	fi
+
+	# The valid source is the one we expect to get packets from after
+	# adding the entry.
+	if [[ $filter_mode == "include" ]]; then
+		src_list=$valid_src
+	else
+		src_list=$invalid_src
+	fi
+
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+		src_ip $valid_src action drop
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+		src_ip $invalid_src action drop
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Packet from valid source received on H2 before adding entry"
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 0
+	check_err $? "Packet from invalid source received on H2 before adding entry"
+
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode $filter_mode source_list $src_list
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet from valid source not received on H2 after adding entry"
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 0
+	check_err $? "Packet from invalid source received on H2 after adding entry"
+
+	bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode exclude
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 2
+	check_err $? "Packet from valid source not received on H2 after allowing all sources"
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 1
+	check_err $? "Packet from invalid source not received on H2 after allowing all sources"
+
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 2
+	check_err $? "Packet from valid source received on H2 after deleting entry"
+
+	$MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 1
+	check_err $? "Packet from invalid source received on H2 after deleting entry"
+
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower
+
+	log_test "$name port group \"$filter_mode\" entries forwarding tests"
+}
+
+fwd_test_port_ip()
+{
+	__fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+	__fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+		"exclude"
+	__fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
+	__fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+		"include"
+}
+
+fwd_test_port_l2()
+{
+	local dmac=01:02:03:04:05:06
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \
+		dst_mac $dmac action drop
+
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Packet received on H2 before adding entry"
+
+	bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet not received on H2 after adding entry"
+
+	bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet received on H2 after deleting entry"
+
+	tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower
+
+	log_test "L2 port entries forwarding tests"
+}
+
+fwd_test_port()
+{
+	# Disable multicast flooding to ensure that packets are only forwarded
+	# out of a port when a matching port group entry is present.
+	bridge link set dev $swp2 mcast_flood off
+
+	fwd_test_port_ip
+	fwd_test_port_l2
+
+	bridge link set dev $swp2 mcast_flood on
+}
+
+fwd_test()
+{
+	echo
+	log_info "# Forwarding tests"
+
+	# Forwarding according to MDB entries only takes place when the bridge
+	# detects that there is a valid querier in the network. Set the bridge
+	# as the querier and assign it a valid IPv6 link-local address to be
+	# used as the source address for MLD queries.
+	ip -6 address add fe80::1/64 nodad dev br0
+	ip link set dev br0 type bridge mcast_querier 1
+	# Wait the default Query Response Interval (10 seconds) for the bridge
+	# to determine that there are no other queriers in the network.
+	sleep 10
+
+	fwd_test_host
+	fwd_test_port
+
+	ip link set dev br0 type bridge mcast_querier 0
+	ip -6 address del fe80::1/64 dev br0
+}
+
+igmpv3_is_in_get()
+{
+	local igmpv3
+
+	igmpv3=$(:
+		)"22:"$(			: Type - Membership Report
+		)"00:"$(			: Reserved
+		)"2a:f8:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"00:01:"$(			: Number of Sources
+		)"ef:01:01:01:"$(		: Multicast Address - 239.1.1.1
+		)"c0:00:02:02"$(		: Source Address - 192.0.2.2
+		)
+
+	echo $igmpv3
+}
+
+ctrl_igmpv3_is_in_test()
+{
+	RET=0
+
+	# Add a permanent entry and check that it is not affected by the
+	# received IGMP packet.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \
+		filter_mode include source_list 192.0.2.1
+
+	# IS_IN ( 192.0.2.2 )
+	$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+		-t ip proto=2,p=$(igmpv3_is_in_get) -q
+
+	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
+	check_fail $? "Permanent entry affected by IGMP packet"
+
+	# Replace the permanent entry with a temporary one and check that after
+	# processing the IGMP packet, a new source is added to the list along
+	# with a new forwarding entry.
+	bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \
+		filter_mode include source_list 192.0.2.1
+
+	# IS_IN ( 192.0.2.2 )
+	$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+		-t ip proto=2,p=$(igmpv3_is_in_get) -q
+
+	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
+		grep -q 192.0.2.2
+	check_err $? "Source not add to source list"
+
+	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \
+		grep -q "src 192.0.2.2"
+	check_err $? "(S, G) entry not created for new source"
+
+	bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
+
+	log_test "IGMPv3 MODE_IS_INCLUE tests"
+}
+
+mldv2_is_in_get()
+{
+	local hbh
+	local icmpv6
+
+	hbh=$(:
+		)"3a:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)
+
+	icmpv6=$(:
+		)"8f:"$(			: Type - MLDv2 Report
+		)"00:"$(			: Code
+		)"45:39:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"00:01:"$(			: Number of Sources
+		)"ff:0e:00:00:00:00:00:00:"$(	: Multicast address - ff0e::1
+		)"00:00:00:00:00:00:00:01:"$(	:
+		)"20:01:0d:b8:00:01:00:00:"$(	: Source Address - 2001:db8:1::2
+		)"00:00:00:00:00:00:00:02:"$(	:
+		)
+
+	echo ${hbh}${icmpv6}
+}
+
+ctrl_mldv2_is_in_test()
+{
+	RET=0
+
+	# Add a permanent entry and check that it is not affected by the
+	# received MLD packet.
+	bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \
+		filter_mode include source_list 2001:db8:1::1
+
+	# IS_IN ( 2001:db8:1::2 )
+	$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+		-t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+
+	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
+		grep -q 2001:db8:1::2
+	check_fail $? "Permanent entry affected by MLD packet"
+
+	# Replace the permanent entry with a temporary one and check that after
+	# processing the MLD packet, a new source is added to the list along
+	# with a new forwarding entry.
+	bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \
+		filter_mode include source_list 2001:db8:1::1
+
+	# IS_IN ( 2001:db8:1::2 )
+	$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+		-t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+
+	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
+		grep -q 2001:db8:1::2
+	check_err $? "Source not add to source list"
+
+	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
+		grep -q "src 2001:db8:1::2"
+	check_err $? "(S, G) entry not created for new source"
+
+	bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
+
+	log_test "MLDv2 MODE_IS_INCLUDE tests"
+}
+
+ctrl_test()
+{
+	echo
+	log_info "# Control packets tests"
+
+	ctrl_igmpv3_is_in_test
+	ctrl_mldv2_is_in_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-58-ga151