121 files changed, 5229 insertions, 920 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-btf b/Documentation/ABI/testing/sysfs-kernel-btf
new file mode 100644
index 000000000000..2c9744b2cd59
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-btf
@@ -0,0 +1,17 @@
+What:		/sys/kernel/btf
+Date:		Aug 2019
+KernelVersion:	5.5
+Contact:	bpf@vger.kernel.org
+Description:
+		Contains BTF type information and related data for kernel and
+		kernel modules.
+
+What:		/sys/kernel/btf/vmlinux
+Date:		Aug 2019
+KernelVersion:	5.5
+Contact:	bpf@vger.kernel.org
+Description:
+		Read-only binary attribute exposing kernel's own BTF type
+		information with description of all internal kernel types. See
+		Documentation/bpf/btf.rst for detailed description of format
+		itself.
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst
index ed343abe541e..a78bf036cadd 100644
--- a/Documentation/bpf/prog_flow_dissector.rst
+++ b/Documentation/bpf/prog_flow_dissector.rst
@@ -26,6 +26,7 @@ The inputs are:
   * ``nhoff`` - initial offset of the networking header
   * ``thoff`` - initial offset of the transport header, initialized to nhoff
   * ``n_proto`` - L3 protocol type, parsed out of L2 header
+  * ``flags`` - optional flags
 
 Flow dissector BPF program should fill out the rest of the ``struct
 bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
@@ -101,6 +102,23 @@ can be called for both cases and would have to be written carefully to
 handle both cases.
 
 
+Flags
+=====
+
+``flow_keys->flags`` might contain optional input flags that work as follows:
+
+* ``BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG`` - tells BPF flow dissector to
+  continue parsing first fragment; the default expected behavior is that
+  flow dissector returns as soon as it finds out that the packet is fragmented;
+  used by ``eth_get_headlen`` to estimate length of all headers for GRO.
+* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
+  stop parsing as soon as it reaches IPv6 flow label; used by
+  ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
+* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
+  parsing as soon as it reaches encapsulated headers; used by routing
+  infrastructure.
+
+
 Reference Implementation
 ========================
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 18f4cc2c6acd..f9a506147c8a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -713,7 +713,7 @@ struct xdp_buff;
 struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
+struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -721,7 +721,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
-void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -801,8 +800,10 @@ static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
-static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
+static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
+							     u32 key)
 {
+	return NULL;
 }
 
 static inline void __dev_map_flush(struct bpf_map *map)
@@ -834,10 +835,6 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 	return NULL;
 }
 
-static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index)
-{
-}
-
 static inline void __cpu_map_flush(struct bpf_map *map)
 {
 }
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index eec5aeeeaf92..36a9c2325176 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 #ifdef CONFIG_NET
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #if defined(CONFIG_BPF_STREAM_PARSER)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3aef8d82ea59..7eb28b72d9ba 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
 
 struct bpf_flow_dissector;
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
-		      __be16 proto, int nhoff, int hlen);
+		      __be16 proto, int nhoff, int hlen, unsigned int flags);
 
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9e9fbfaf052b..77fe87f7a992 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -415,6 +415,16 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
+ *	BPF SKB-less helpers
+ */
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+			 struct tcphdr *th, u32 *cookie);
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+			 struct tcphdr *th, u32 *cookie);
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+			  const struct tcp_request_sock_ops *af_ops,
+			  struct sock *sk, struct tcphdr *th);
+/*
  *	TCP v4 functions exported for the inet6 API
  */
 
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 68899fdc985b..8c8420230a10 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -175,7 +175,8 @@ struct _bpf_dtab_netdev {
 #endif /* __DEVMAP_OBJ_TYPE */
 
 #define devmap_ifindex(fwd, map)				\
-	((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	((map->map_type == BPF_MAP_TYPE_DEVMAP ||		\
+	  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ?		\
 	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fa1c753dcdbc..4393bd4b2419 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_DEVMAP_HASH,
 };
 
 /* Note that tracing related programs such as
@@ -2713,6 +2714,33 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *	Description
+ *		Try to issue a SYN cookie for the packet with corresponding
+ *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ *		*iph* points to the start of the IPv4 or IPv6 header, while
+ *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+ *		**sizeof**\ (**struct ip6hdr**).
+ *
+ *		*th* points to the start of the TCP header, while *th_len*
+ *		contains the length of the TCP header.
+ *
+ *	Return
+ *		On success, lower 32 bits hold the generated SYN cookie in
+ *		followed by 16 bits which hold the MSS value for that cookie,
+ *		and the top 16 bits are unused.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** SYN cookie cannot be issued due to error
+ *
+ *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2824,7 +2852,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(tcp_gen_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3507,6 +3536,10 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
+
 struct bpf_flow_keys {
 	__u16	nhoff;
 	__u16	thoff;
@@ -3528,6 +3561,8 @@ struct bpf_flow_keys {
 			__u32	ipv6_dst[4];	/* in6_addr; network order */
 		};
 	};
+	__u32	flags;
+	__be32	flow_label;
 };
 
 struct bpf_func_info {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 29d781061cd5..e1d9adb212f9 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
 obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
 endif
+ifeq ($(CONFIG_SYSFS),y)
+obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
+endif
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0a00eaca6fae..6a6a154cfa7b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -964,7 +964,6 @@ static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
 		return -ENOMEM;
 
 	ctx->optval_end = ctx->optval + max_optlen;
-	ctx->optlen = max_optlen;
 
 	return 0;
 }
@@ -984,7 +983,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 		.level = *level,
 		.optname = *optname,
 	};
-	int ret;
+	int ret, max_optlen;
 
 	/* Opportunistic check to see whether we have any BPF program
 	 * attached to the hook so we don't waste time allocating
@@ -994,10 +993,18 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
 		return 0;
 
-	ret = sockopt_alloc_buf(&ctx, *optlen);
+	/* Allocate a bit more than the initial user buffer for
+	 * BPF program. The canonical use case is overriding
+	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
+	 */
+	max_optlen = max_t(int, 16, *optlen);
+
+	ret = sockopt_alloc_buf(&ctx, max_optlen);
 	if (ret)
 		return ret;
 
+	ctx.optlen = *optlen;
+
 	if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
 		ret = -EFAULT;
 		goto out;
@@ -1016,7 +1023,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	if (ctx.optlen == -1) {
 		/* optlen set to -1, bypass kernel */
 		ret = 1;
-	} else if (ctx.optlen > *optlen || ctx.optlen < -1) {
+	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
 		/* optlen is out of bounds */
 		ret = -EFAULT;
 	} else {
@@ -1063,6 +1070,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	if (ret)
 		return ret;
 
+	ctx.optlen = max_optlen;
+
 	if (!retval) {
 		/* If kernel getsockopt finished successfully,
 		 * copy whatever was returned to the user back
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d83cf8ccc872..9af048a932b5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -37,6 +37,12 @@
  * notifier hook walks the map we know that new dev references can not be
  * added by the user because core infrastructure ensures dev_get_by_index()
  * calls will fail at this point.
+ *
+ * The devmap_hash type is a map type which interprets keys as ifindexes and
+ * indexes these using a hashmap. This allows maps that use ifindex as key to be
+ * densely packed instead of having holes in the lookup array for unused
+ * ifindexes. The setup and packet enqueue/send code is shared between the two
+ * types of devmap; only the lookup and insertion is different.
  */
 #include <linux/bpf.h>
 #include <net/xdp.h>
@@ -59,10 +65,11 @@ struct xdp_bulk_queue {
 
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
+	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
-	unsigned int bit;
 	struct xdp_bulk_queue __percpu *bulkq;
 	struct rcu_head rcu;
+	unsigned int idx; /* keep track of map index for tracepoint */
 };
 
 struct bpf_dtab {
@@ -70,33 +77,45 @@ struct bpf_dtab {
 	struct bpf_dtab_netdev **netdev_map;
 	struct list_head __percpu *flush_list;
 	struct list_head list;
+
+	/* these are only used for DEVMAP_HASH type maps */
+	struct hlist_head *dev_index_head;
+	spinlock_t index_lock;
+	unsigned int items;
+	u32 n_buckets;
 };
 
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
-static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+static struct hlist_head *dev_map_create_hash(unsigned int entries)
+{
+	int i;
+	struct hlist_head *hash;
+
+	hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
+	if (hash != NULL)
+		for (i = 0; i < entries; i++)
+			INIT_HLIST_HEAD(&hash[i]);
+
+	return hash;
+}
+
+static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 {
-	struct bpf_dtab *dtab;
 	int err, cpu;
 	u64 cost;
 
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
 	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
 	 * verifier prevents writes from the BPF side
 	 */
 	attr->map_flags |= BPF_F_RDONLY_PROG;
 
-	dtab = kzalloc(sizeof(*dtab), GFP_USER);
-	if (!dtab)
-		return ERR_PTR(-ENOMEM);
 
 	bpf_map_init_from_attr(&dtab->map, attr);
 
@@ -104,12 +123,18 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
 	cost += sizeof(struct list_head) * num_possible_cpus();
 
+	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+		dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
+
+		if (!dtab->n_buckets) /* Overflow check */
+			return -EINVAL;
+		cost += sizeof(struct hlist_head) * dtab->n_buckets;
+	}
+
 	/* if map size is larger than memlock limit, reject it */
 	err = bpf_map_charge_init(&dtab->map.memory, cost);
 	if (err)
-		goto free_dtab;
-
-	err = -ENOMEM;
+		return -EINVAL;
 
 	dtab->flush_list = alloc_percpu(struct list_head);
 	if (!dtab->flush_list)
@@ -124,19 +149,48 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	if (!dtab->netdev_map)
 		goto free_percpu;
 
-	spin_lock(&dev_map_lock);
-	list_add_tail_rcu(&dtab->list, &dev_map_list);
-	spin_unlock(&dev_map_lock);
+	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
+		if (!dtab->dev_index_head)
+			goto free_map_area;
 
-	return &dtab->map;
+		spin_lock_init(&dtab->index_lock);
+	}
 
+	return 0;
+
+free_map_area:
+	bpf_map_area_free(dtab->netdev_map);
 free_percpu:
 	free_percpu(dtab->flush_list);
 free_charge:
 	bpf_map_charge_finish(&dtab->map.memory);
-free_dtab:
-	kfree(dtab);
-	return ERR_PTR(err);
+	return -ENOMEM;
+}
+
+static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_dtab *dtab;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	dtab = kzalloc(sizeof(*dtab), GFP_USER);
+	if (!dtab)
+		return ERR_PTR(-ENOMEM);
+
+	err = dev_map_init_map(dtab, attr);
+	if (err) {
+		kfree(dtab);
+		return ERR_PTR(err);
+	}
+
+	spin_lock(&dev_map_lock);
+	list_add_tail_rcu(&dtab->list, &dev_map_list);
+	spin_unlock(&dev_map_lock);
+
+	return &dtab->map;
 }
 
 static void dev_map_free(struct bpf_map *map)
@@ -188,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
 
 	free_percpu(dtab->flush_list);
 	bpf_map_area_free(dtab->netdev_map);
+	kfree(dtab->dev_index_head);
 	kfree(dtab);
 }
 
@@ -208,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
+static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
+						    int idx)
+{
+	return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
+}
+
+struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct hlist_head *head = dev_map_index_hash(dtab, key);
+	struct bpf_dtab_netdev *dev;
+
+	hlist_for_each_entry_rcu(dev, head, index_hlist)
+		if (dev->idx == key)
+			return dev;
+
+	return NULL;
+}
+
+static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
+				    void *next_key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	u32 idx, *next = next_key;
+	struct bpf_dtab_netdev *dev, *next_dev;
+	struct hlist_head *head;
+	int i = 0;
+
+	if (!key)
+		goto find_first;
+
+	idx = *(u32 *)key;
+
+	dev = __dev_map_hash_lookup_elem(map, idx);
+	if (!dev)
+		goto find_first;
+
+	next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
+				    struct bpf_dtab_netdev, index_hlist);
+
+	if (next_dev) {
+		*next = next_dev->idx;
+		return 0;
+	}
+
+	i = idx & (dtab->n_buckets - 1);
+	i++;
+
+ find_first:
+	for (; i < dtab->n_buckets; i++) {
+		head = dev_map_index_hash(dtab, i);
+
+		next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+					    struct bpf_dtab_netdev,
+					    index_hlist);
+		if (next_dev) {
+			*next = next_dev->idx;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
 static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
 		       bool in_napi_ctx)
 {
@@ -235,7 +354,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
 out:
 	bq->count = 0;
 
-	trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
+	trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
 			      sent, drops, bq->dev_rx, dev, err);
 	bq->dev_rx = NULL;
 	__list_del_clearprev(&bq->flush_node);
@@ -363,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 	return dev ? &dev->ifindex : NULL;
 }
 
+static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
+								*(u32 *)key);
+	struct net_device *dev = obj ? obj->dev : NULL;
+
+	return dev ? &dev->ifindex : NULL;
+}
+
 static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 {
 	if (dev->dev->netdev_ops->ndo_xdp_xmit) {
@@ -412,17 +540,74 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
 	return 0;
 }
 
-static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
-				u64 map_flags)
+static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-	struct net *net = current->nsproxy->net_ns;
+	struct bpf_dtab_netdev *old_dev;
+	int k = *(u32 *)key;
+	unsigned long flags;
+	int ret = -ENOENT;
+
+	spin_lock_irqsave(&dtab->index_lock, flags);
+
+	old_dev = __dev_map_hash_lookup_elem(map, k);
+	if (old_dev) {
+		dtab->items--;
+		hlist_del_init_rcu(&old_dev->index_hlist);
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&dtab->index_lock, flags);
+
+	return ret;
+}
+
+static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
+						    struct bpf_dtab *dtab,
+						    u32 ifindex,
+						    unsigned int idx)
+{
 	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+	struct bpf_dtab_netdev *dev;
+	struct xdp_bulk_queue *bq;
+	int cpu;
+
+	dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+					sizeof(void *), gfp);
+	if (!dev->bulkq) {
+		kfree(dev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_possible_cpu(cpu) {
+		bq = per_cpu_ptr(dev->bulkq, cpu);
+		bq->obj = dev;
+	}
+
+	dev->dev = dev_get_by_index(net, ifindex);
+	if (!dev->dev) {
+		free_percpu(dev->bulkq);
+		kfree(dev);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev->idx = idx;
+	dev->dtab = dtab;
+
+	return dev;
+}
+
+static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
+				 void *key, void *value, u64 map_flags)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
 	u32 ifindex = *(u32 *)value;
-	struct xdp_bulk_queue *bq;
 	u32 i = *(u32 *)key;
-	int cpu;
 
 	if (unlikely(map_flags > BPF_EXIST))
 		return -EINVAL;
@@ -434,31 +619,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (!ifindex) {
 		dev = NULL;
 	} else {
-		dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
-		if (!dev)
-			return -ENOMEM;
-
-		dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
-						sizeof(void *), gfp);
-		if (!dev->bulkq) {
-			kfree(dev);
-			return -ENOMEM;
-		}
-
-		for_each_possible_cpu(cpu) {
-			bq = per_cpu_ptr(dev->bulkq, cpu);
-			bq->obj = dev;
-		}
-
-		dev->dev = dev_get_by_index(net, ifindex);
-		if (!dev->dev) {
-			free_percpu(dev->bulkq);
-			kfree(dev);
-			return -EINVAL;
-		}
-
-		dev->bit = i;
-		dev->dtab = dtab;
+		dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+		if (IS_ERR(dev))
+			return PTR_ERR(dev);
 	}
 
 	/* Use call_rcu() here to ensure rcu critical sections have completed
@@ -472,6 +635,63 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 	return 0;
 }
 
+static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+			       u64 map_flags)
+{
+	return __dev_map_update_elem(current->nsproxy->net_ns,
+				     map, key, value, map_flags);
+}
+
+static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
+				     void *key, void *value, u64 map_flags)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *dev, *old_dev;
+	u32 ifindex = *(u32 *)value;
+	u32 idx = *(u32 *)key;
+	unsigned long flags;
+
+	if (unlikely(map_flags > BPF_EXIST || !ifindex))
+		return -EINVAL;
+
+	old_dev = __dev_map_hash_lookup_elem(map, idx);
+	if (old_dev && (map_flags & BPF_NOEXIST))
+		return -EEXIST;
+
+	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	spin_lock_irqsave(&dtab->index_lock, flags);
+
+	if (old_dev) {
+		hlist_del_rcu(&old_dev->index_hlist);
+	} else {
+		if (dtab->items >= dtab->map.max_entries) {
+			spin_unlock_irqrestore(&dtab->index_lock, flags);
+			call_rcu(&dev->rcu, __dev_map_entry_free);
+			return -E2BIG;
+		}
+		dtab->items++;
+	}
+
+	hlist_add_head_rcu(&dev->index_hlist,
+			   dev_map_index_hash(dtab, idx));
+	spin_unlock_irqrestore(&dtab->index_lock, flags);
+
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+
+	return 0;
+}
+
+static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
+				   u64 map_flags)
+{
+	return __dev_map_hash_update_elem(current->nsproxy->net_ns,
+					 map, key, value, map_flags);
+}
+
 const struct bpf_map_ops dev_map_ops = {
 	.map_alloc = dev_map_alloc,
 	.map_free = dev_map_free,
@@ -482,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_check_btf = map_check_no_btf,
 };
 
+const struct bpf_map_ops dev_map_hash_ops = {
+	.map_alloc = dev_map_alloc,
+	.map_free = dev_map_free,
+	.map_get_next_key = dev_map_hash_get_next_key,
+	.map_lookup_elem = dev_map_hash_lookup_elem,
+	.map_update_elem = dev_map_hash_update_elem,
+	.map_delete_elem = dev_map_hash_delete_elem,
+	.map_check_btf = map_check_no_btf,
+};
+
 static int dev_map_notification(struct notifier_block *notifier,
 				ulong event, void *ptr)
 {
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
new file mode 100644
index 000000000000..4659349fc795
--- /dev/null
+++ b/kernel/bpf/sysfs_btf.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide kernel BTF information for introspection and use by eBPF tools.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/init.h>
+#include <linux/sysfs.h>
+
+/* See scripts/link-vmlinux.sh, gen_btf() func for details */
+extern char __weak _binary__btf_vmlinux_bin_start[];
+extern char __weak _binary__btf_vmlinux_bin_end[];
+
+static ssize_t
+btf_vmlinux_read(struct file *file, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
+		 char *buf, loff_t off, size_t len)
+{
+	memcpy(buf, _binary__btf_vmlinux_bin_start + off, len);
+	return len;
+}
+
+static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
+	.attr = { .name = "vmlinux", .mode = 0444, },
+	.read = btf_vmlinux_read,
+};
+
+static struct kobject *btf_kobj;
+
+static int __init btf_vmlinux_init(void)
+{
+	int err;
+
+	if (!_binary__btf_vmlinux_bin_start)
+		return 0;
+
+	btf_kobj = kobject_create_and_add("btf", kernel_kobj);
+	if (IS_ERR(btf_kobj)) {
+		err = PTR_ERR(btf_kobj);
+		btf_kobj = NULL;
+		return err;
+	}
+
+	bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end -
+				    _binary__btf_vmlinux_bin_start;
+
+	return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux);
+}
+
+subsys_initcall(btf_vmlinux_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c84d83f86141..10c0ff93f52b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH:
 		if (func_id != BPF_FUNC_redirect_map &&
 		    func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
@@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		break;
 	case BPF_FUNC_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
 			goto error;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 80e6f3a6864d..1153bbcdff72 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -377,6 +377,22 @@ out:
 	return ret;
 }
 
+static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
+{
+	/* make sure the fields we don't use are zeroed */
+	if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
+		return -EINVAL;
+
+	/* flags is allowed */
+
+	if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
+			   FIELD_SIZEOF(struct bpf_flow_keys, flags),
+			   sizeof(struct bpf_flow_keys)))
+		return -EINVAL;
+
+	return 0;
+}
+
 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr)
@@ -384,9 +400,11 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	u32 size = kattr->test.data_size_in;
 	struct bpf_flow_dissector ctx = {};
 	u32 repeat = kattr->test.repeat;
+	struct bpf_flow_keys *user_ctx;
 	struct bpf_flow_keys flow_keys;
 	u64 time_start, time_spent = 0;
 	const struct ethhdr *eth;
+	unsigned int flags = 0;
 	u32 retval, duration;
 	void *data;
 	int ret;
@@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
 
-	if (kattr->test.ctx_in || kattr->test.ctx_out)
-		return -EINVAL;
-
 	if (size < ETH_HLEN)
 		return -EINVAL;
 
@@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	if (!repeat)
 		repeat = 1;
 
+	user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
+	if (IS_ERR(user_ctx)) {
+		kfree(data);
+		return PTR_ERR(user_ctx);
+	}
+	if (user_ctx) {
+		ret = verify_user_bpf_flow_keys(user_ctx);
+		if (ret)
+			goto out;
+		flags = user_ctx->flags;
+	}
+
 	ctx.flow_keys = &flow_keys;
 	ctx.data = data;
 	ctx.data_end = (__u8 *)data + size;
@@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
-					  size);
+					  size, flags);
 
 		if (signal_pending(current)) {
 			preempt_enable();
@@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 
 	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
 			      retval, duration);
+	if (!ret)
+		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+				     sizeof(struct bpf_flow_keys));
 
 out:
+	kfree(user_ctx);
 	kfree(data);
 	return ret;
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 7878f918b8c0..0c1059cdad3d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 	int err;
 
 	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP: {
+	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH: {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_enqueue(dst, xdp, dev_rx);
@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void)
 	if (map) {
 		switch (map->map_type) {
 		case BPF_MAP_TYPE_DEVMAP:
+		case BPF_MAP_TYPE_DEVMAP_HASH:
 			__dev_map_flush(map);
 			break;
 		case BPF_MAP_TYPE_CPUMAP:
@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP:
 		return __dev_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		return __dev_map_hash_lookup_elem(map, index);
 	case BPF_MAP_TYPE_CPUMAP:
 		return __cpu_map_lookup_elem(map, index);
 	case BPF_MAP_TYPE_XSKMAP:
@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 	ri->tgt_value = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
+	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
@@ -5850,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+	u32 cookie;
+	u16 mss;
+
+	if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+		return -EINVAL;
+
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -ENOENT;
+
+	if (!th->syn || th->ack || th->fin || th->rst)
+		return -EINVAL;
+
+	if (unlikely(iph_len < sizeof(struct iphdr)))
+		return -EINVAL;
+
+	/* Both struct iphdr and struct ipv6hdr have the version field at the
+	 * same offset so we can cast to the shorter header (struct iphdr).
+	 */
+	switch (((struct iphdr *)iph)->version) {
+	case 4:
+		if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+			return -EINVAL;
+
+		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
+		break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+	case 6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+
+		if (sk->sk_family != AF_INET6)
+			return -EINVAL;
+
+		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+	if (mss <= 0)
+		return -ENOENT;
+
+	return cookie | ((u64)mss << 32);
+#else
+	return -EOPNOTSUPP;
+#endif /* CONFIG_SYN_COOKIES */
+}
+
+static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+	.func		= bpf_tcp_gen_syncookie,
+	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5999,6 +6073,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6019,6 +6095,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
@@ -6135,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_check_syncookie_proto;
 	case BPF_FUNC_skb_ecn_set_ce:
 		return &bpf_skb_ecn_set_ce_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6174,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_skc_lookup_tcp_proto;
 	case BPF_FUNC_tcp_check_syncookie:
 		return &bpf_tcp_check_syncookie_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6267,6 +6349,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_redirect_map_proto;
 	case BPF_FUNC_sk_redirect_hash:
 		return &bpf_sk_redirect_hash_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_tcp:
 		return &bpf_sk_lookup_tcp_proto;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3e6fedb57bc1..9741b593ea53 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 	struct flow_dissector_key_basic *key_basic;
 	struct flow_dissector_key_addrs *key_addrs;
 	struct flow_dissector_key_ports *key_ports;
+	struct flow_dissector_key_tags *key_tags;
 
 	key_control = skb_flow_dissector_target(flow_dissector,
 						FLOW_DISSECTOR_KEY_CONTROL,
@@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 		key_ports->src = flow_keys->sport;
 		key_ports->dst = flow_keys->dport;
 	}
+
+	if (dissector_uses_key(flow_dissector,
+			       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+		key_tags = skb_flow_dissector_target(flow_dissector,
+						     FLOW_DISSECTOR_KEY_FLOW_LABEL,
+						     target_container);
+		key_tags->flow_label = ntohl(flow_keys->flow_label);
+	}
 }
 
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
-		      __be16 proto, int nhoff, int hlen)
+		      __be16 proto, int nhoff, int hlen, unsigned int flags)
 {
 	struct bpf_flow_keys *flow_keys = ctx->flow_keys;
 	u32 result;
@@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 	flow_keys->nhoff = nhoff;
 	flow_keys->thoff = flow_keys->nhoff;
 
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
+		     (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+	flow_keys->flags = flags;
+
 	preempt_disable();
 	result = BPF_PROG_RUN(prog, ctx);
 	preempt_enable();
@@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net,
 			}
 
 			ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
-					       hlen);
+					       hlen, flags);
 			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
 						 target_container);
 			rcu_read_unlock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c21e8a22fb3b..706cbb3b2986 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th,
 #endif
 }
 
+/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
+ * value on success.
+ */
+static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+{
+	const unsigned char *ptr = (const unsigned char *)(th + 1);
+	int length = (th->doff * 4) - sizeof(struct tcphdr);
+	u16 mss = 0;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return mss;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return mss;
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
+				return mss;
+			if (opsize > length)
+				return mss;	/* fail on partial options */
+			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+				u16 in_mss = get_unaligned_be16(ptr);
+
+				if (in_mss) {
+					if (user_mss && user_mss < in_mss)
+						in_mss = user_mss;
+					mss = in_mss;
+				}
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+	return mss;
+}
+
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
@@ -6422,9 +6465,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
 /*
  * Return true if a syncookie should be sent
  */
-static bool tcp_syn_flood_action(const struct sock *sk,
-				 const struct sk_buff *skb,
-				 const char *proto)
+static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
 {
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	const char *msg = "Dropping request";
@@ -6444,7 +6485,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
 	    net->ipv4.sysctl_tcp_syncookies != 2 &&
 	    xchg(&queue->synflood_warned, 1) == 0)
 		net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-				     proto, ntohs(tcp_hdr(skb)->dest), msg);
+				     proto, sk->sk_num, msg);
 
 	return want_cookie;
 }
@@ -6466,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
 	}
 }
 
+/* If a SYN cookie is required and supported, returns a clamped MSS value to be
+ * used for SYN cookie generation.
+ */
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+			  const struct tcp_request_sock_ops *af_ops,
+			  struct sock *sk, struct tcphdr *th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u16 mss;
+
+	if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+	    !inet_csk_reqsk_queue_is_full(sk))
+		return 0;
+
+	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
+		return 0;
+
+	if (sk_acceptq_is_full(sk)) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+		return 0;
+	}
+
+	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
+	if (!mss)
+		mss = af_ops->mss_clamp;
+
+	return mss;
+}
+EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
@@ -6487,7 +6558,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	 */
 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
 	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
-		want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
 		if (!want_cookie)
 			goto drop;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e0a372676329..fd394ad179a0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }
 
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
+				    &tcp_request_sock_ipv4_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v4_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 /* The socket must have it's spinlock held when we get
  * here, unless it is a TCP_LISTEN socket.
  *
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5da069e91cac..87f44d3250ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }
 
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
+				    &tcp_request_sock_ipv6_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP))
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 83de74ca729a..a0607969f8c0 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/idr.h>
+#include <linux/highmem.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
@@ -164,6 +165,14 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
 	umem->zc = false;
 }
 
+static void xdp_umem_unmap_pages(struct xdp_umem *umem)
+{
+	unsigned int i;
+
+	for (i = 0; i < umem->npgs; i++)
+		kunmap(umem->pgs[i]);
+}
+
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
 	unsigned int i;
@@ -207,6 +216,7 @@ static void xdp_umem_release(struct xdp_umem *umem)
 
 	xsk_reuseq_destroy(umem);
 
+	xdp_umem_unmap_pages(umem);
 	xdp_umem_unpin_pages(umem);
 
 	kfree(umem->pages);
@@ -369,7 +379,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	}
 
 	for (i = 0; i < umem->npgs; i++)
-		umem->pages[i].addr = page_address(umem->pgs[i]);
+		umem->pages[i].addr = kmap(umem->pgs[i]);
 
 	return 0;
 
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 2dd1d39b152a..8ee47699a870 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -18,9 +18,6 @@
 #include <libbpf.h>
 #include "bpf_load.h"
 #include "perf-sys.h"
-#include "trace_helpers.h"
-
-static int pmu_fd;
 
 static __u64 time_get_ns(void)
 {
@@ -31,12 +28,12 @@ static __u64 time_get_ns(void)
 }
 
 static __u64 start_time;
+static __u64 cnt;
 
 #define MAX_CNT 100000ll
 
-static int print_bpf_output(void *data, int size)
+static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
 {
-	static __u64 cnt;
 	struct {
 		__u64 pid;
 		__u64 cookie;
@@ -45,7 +42,7 @@ static int print_bpf_output(void *data, int size)
 	if (e->cookie != 0x12345678) {
 		printf("BUG pid %llx cookie %llx sized %d\n",
 		       e->pid, e->cookie, size);
-		return LIBBPF_PERF_EVENT_ERROR;
+		return;
 	}
 
 	cnt++;
@@ -53,30 +50,14 @@ static int print_bpf_output(void *data, int size)
 	if (cnt == MAX_CNT) {
 		printf("recv %lld events per sec\n",
 		       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
-		return LIBBPF_PERF_EVENT_DONE;
+		return;
 	}
-
-	return LIBBPF_PERF_EVENT_CONT;
-}
-
-static void test_bpf_perf_event(void)
-{
-	struct perf_event_attr attr = {
-		.sample_type = PERF_SAMPLE_RAW,
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_BPF_OUTPUT,
-	};
-	int key = 0;
-
-	pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
-
-	assert(pmu_fd >= 0);
-	assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
-	ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
 }
 
 int main(int argc, char **argv)
 {
+	struct perf_buffer_opts pb_opts = {};
+	struct perf_buffer *pb;
 	char filename[256];
 	FILE *f;
 	int ret;
@@ -88,16 +69,20 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	test_bpf_perf_event();
-
-	if (perf_event_mmap(pmu_fd) < 0)
+	pb_opts.sample_cb = print_bpf_output;
+	pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+	ret = libbpf_get_error(pb);
+	if (ret) {
+		printf("failed to setup perf_buffer: %d\n", ret);
 		return 1;
+	}
 
 	f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
 	(void) f;
 
 	start_time = time_get_ns();
-	ret = perf_event_poller(pmu_fd, print_bpf_output);
+	while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
+	}
 	kill(0, SIGINT);
 	return ret;
 }
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index a7e94e7ff87d..701a30f258b1 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -23,7 +23,8 @@
 
 #define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
 
-struct bpf_map_def SEC("maps") tx_port = {
+/* For TX-traffic redirect requires net_device ifindex to be in this devmap */
+struct bpf_map_def SEC("maps") xdp_tx_ports = {
 	.type = BPF_MAP_TYPE_DEVMAP,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
@@ -102,14 +103,34 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 	fib_params.ifindex = ctx->ingress_ifindex;
 
 	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
-
-	/* verify egress index has xdp support
-	 * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
-	 *       cannot pass map_type 14 into func bpf_map_lookup_elem#1:
-	 * NOTE: without verification that egress index supports XDP
-	 *       forwarding packets are dropped.
+	/*
+	 * Some rc (return codes) from bpf_fib_lookup() are important,
+	 * to understand how this XDP-prog interacts with network stack.
+	 *
+	 * BPF_FIB_LKUP_RET_NO_NEIGH:
+	 *  Even if route lookup was a success, then the MAC-addresses are also
+	 *  needed.  This is obtained from arp/neighbour table, but if table is
+	 *  (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned.  To avoid
+	 *  doing ARP lookup directly from XDP, then send packet to normal
+	 *  network stack via XDP_PASS and expect it will do ARP resolution.
+	 *
+	 * BPF_FIB_LKUP_RET_FWD_DISABLED:
+	 *  The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
+	 *  setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
+	 *  enabled this on ingress device.
 	 */
-	if (rc == 0) {
+	if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
+		/* Verify egress index has been configured as TX-port.
+		 * (Note: User can still have inserted an egress ifindex that
+		 * doesn't support XDP xmit, which will result in packet drops).
+		 *
+		 * Note: lookup in devmap supported since 0cdbb4b09a0.
+		 * If not supported will fail with:
+		 *  cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+		 */
+		if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
+			return XDP_PASS;
+
 		if (h_proto == htons(ETH_P_IP))
 			ip_decrease_ttl(iph);
 		else if (h_proto == htons(ETH_P_IPV6))
@@ -117,7 +138,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 
 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
-		return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
+		return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
 	}
 
 	return XDP_PASS;
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 5b46ee12c696..97ff1dad7669 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -27,14 +27,20 @@
 #include "libbpf.h"
 #include <bpf/bpf.h>
 
-
-static int do_attach(int idx, int fd, const char *name)
+static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
 {
 	int err;
 
-	err = bpf_set_link_xdp_fd(idx, fd, 0);
-	if (err < 0)
+	err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
+	if (err < 0) {
 		printf("ERROR: failed to attach program to %s\n", name);
+		return err;
+	}
+
+	/* Adding ifindex as a possible egress TX port */
+	err = bpf_map_update_elem(map_fd, &idx, &idx, 0);
+	if (err)
+		printf("ERROR: failed using device %s as TX-port\n", name);
 
 	return err;
 }
@@ -47,6 +53,9 @@ static int do_detach(int idx, const char *name)
 	if (err < 0)
 		printf("ERROR: failed to detach program from %s\n", name);
 
+	/* TODO: Remember to cleanup map, when adding use of shared map
+	 *  bpf_map_delete_elem((map_fd, &idx);
+	 */
 	return err;
 }
 
@@ -67,10 +76,10 @@ int main(int argc, char **argv)
 	};
 	const char *prog_name = "xdp_fwd";
 	struct bpf_program *prog;
+	int prog_fd, map_fd = -1;
 	char filename[PATH_MAX];
 	struct bpf_object *obj;
 	int opt, i, idx, err;
-	int prog_fd, map_fd;
 	int attach = 1;
 	int ret = 0;
 
@@ -103,8 +112,14 @@ int main(int argc, char **argv)
 			return 1;
 		}
 
-		if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		err = bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd);
+		if (err) {
+			printf("Does kernel support devmap lookup?\n");
+			/* If not, the error message will be:
+			 *  "cannot pass map_type 14 into func bpf_map_lookup_elem#1"
+			 */
 			return 1;
+		}
 
 		prog = bpf_object__find_program_by_title(obj, prog_name);
 		prog_fd = bpf_program__fd(prog);
@@ -113,16 +128,12 @@ int main(int argc, char **argv)
 			return 1;
 		}
 		map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
-								  "tx_port"));
+							"xdp_tx_ports"));
 		if (map_fd < 0) {
 			printf("map not found: %s\n", strerror(map_fd));
 			return 1;
 		}
 	}
-	if (attach) {
-		for (i = 1; i < 64; ++i)
-			bpf_map_update_elem(map_fd, &i, &i, 0);
-	}
 
 	for (i = optind; i < argc; ++i) {
 		idx = if_nametoindex(argv[i]);
@@ -138,7 +149,7 @@ int main(int argc, char **argv)
 			if (err)
 				ret = err;
 		} else {
-			err = do_attach(idx, prog_fd, argv[i]);
+			err = do_attach(idx, prog_fd, map_fd, argv[i]);
 			if (err)
 				ret = err;
 		}
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index dc66345a929a..3002714e3cd5 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -17,14 +17,13 @@
 #include <linux/if_link.h>
 
 #include "perf-sys.h"
-#include "trace_helpers.h"
 
 #define MAX_CPUS 128
-static int pmu_fds[MAX_CPUS], if_idx;
-static struct perf_event_mmap_page *headers[MAX_CPUS];
+static int if_idx;
 static char *if_name;
 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static __u32 prog_id;
+static struct perf_buffer *pb = NULL;
 
 static int do_attach(int idx, int fd, const char *name)
 {
@@ -73,7 +72,7 @@ static int do_detach(int idx, const char *name)
 
 #define SAMPLE_SIZE 64
 
-static int print_bpf_output(void *data, int size)
+static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
 {
 	struct {
 		__u16 cookie;
@@ -83,45 +82,20 @@ static int print_bpf_output(void *data, int size)
 	int i;
 
 	if (e->cookie != 0xdead) {
-		printf("BUG cookie %x sized %d\n",
-		       e->cookie, size);
-		return LIBBPF_PERF_EVENT_ERROR;
+		printf("BUG cookie %x sized %d\n", e->cookie, size);
+		return;
 	}
 
 	printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
 	for (i = 0; i < 14 && i < e->pkt_len; i++)
 		printf("%02x ", e->pkt_data[i]);
 	printf("\n");
-
-	return LIBBPF_PERF_EVENT_CONT;
-}
-
-static void test_bpf_perf_event(int map_fd, int num)
-{
-	struct perf_event_attr attr = {
-		.sample_type = PERF_SAMPLE_RAW,
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_BPF_OUTPUT,
-		.wakeup_events = 1, /* get an fd notification for every event */
-	};
-	int i;
-
-	for (i = 0; i < num; i++) {
-		int key = i;
-
-		pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
-						 -1/*group_fd*/, 0);
-
-		assert(pmu_fds[i] >= 0);
-		assert(bpf_map_update_elem(map_fd, &key,
-					   &pmu_fds[i], BPF_ANY) == 0);
-		ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
-	}
 }
 
 static void sig_handler(int signo)
 {
 	do_detach(if_idx, if_name);
+	perf_buffer__free(pb);
 	exit(0);
 }
 
@@ -140,13 +114,13 @@ int main(int argc, char **argv)
 	struct bpf_prog_load_attr prog_load_attr = {
 		.prog_type	= BPF_PROG_TYPE_XDP,
 	};
+	struct perf_buffer_opts pb_opts = {};
 	const char *optstr = "F";
 	int prog_fd, map_fd, opt;
 	struct bpf_object *obj;
 	struct bpf_map *map;
 	char filename[256];
-	int ret, err, i;
-	int numcpus;
+	int ret, err;
 
 	while ((opt = getopt(argc, argv, optstr)) != -1) {
 		switch (opt) {
@@ -169,10 +143,6 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	numcpus = get_nprocs();
-	if (numcpus > MAX_CPUS)
-		numcpus = MAX_CPUS;
-
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	prog_load_attr.file = filename;
 
@@ -211,14 +181,17 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	test_bpf_perf_event(map_fd, numcpus);
+	pb_opts.sample_cb = print_bpf_output;
+	pb = perf_buffer__new(map_fd, 8, &pb_opts);
+	err = libbpf_get_error(pb);
+	if (err) {
+		perror("perf_buffer setup failed");
+		return 1;
+	}
 
-	for (i = 0; i < numcpus; i++)
-		if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
-			return 1;
+	while ((ret = perf_buffer__poll(pb, 1000)) >= 0) {
+	}
 
-	ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
-				      print_bpf_output);
 	kill(0, SIGINT);
 	return ret;
 }
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 915775eb2921..c31193340108 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -56,8 +56,8 @@ modpost_link()
 }
 
 # Link of vmlinux
-# ${1} - optional extra .o files
-# ${2} - output file
+# ${1} - output file
+# ${@:2} - optional extra .o files
 vmlinux_link()
 {
 	local lds="${objtree}/${KBUILD_LDS}"
@@ -70,9 +70,9 @@ vmlinux_link()
 			--start-group				\
 			${KBUILD_VMLINUX_LIBS}			\
 			--end-group				\
-			${1}"
+			${@:2}"
 
-		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}	\
+		${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1}	\
 			-T ${lds} ${objects}
 	else
 		objects="-Wl,--whole-archive			\
@@ -81,9 +81,9 @@ vmlinux_link()
 			-Wl,--start-group			\
 			${KBUILD_VMLINUX_LIBS}			\
 			-Wl,--end-group				\
-			${1}"
+			${@:2}"
 
-		${CC} ${CFLAGS_vmlinux} -o ${2}			\
+		${CC} ${CFLAGS_vmlinux} -o ${1}			\
 			-Wl,-T,${lds}				\
 			${objects}				\
 			-lutil -lrt -lpthread
@@ -92,23 +92,34 @@ vmlinux_link()
 }
 
 # generate .BTF typeinfo from DWARF debuginfo
+# ${1} - vmlinux image
+# ${2} - file to dump raw BTF data into
 gen_btf()
 {
-	local pahole_ver;
+	local pahole_ver
+	local bin_arch
 
 	if ! [ -x "$(command -v ${PAHOLE})" ]; then
 		info "BTF" "${1}: pahole (${PAHOLE}) is not available"
-		return 0
+		return 1
 	fi
 
 	pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
 	if [ "${pahole_ver}" -lt "113" ]; then
 		info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
-		return 0
+		return 1
 	fi
 
-	info "BTF" ${1}
+	info "BTF" ${2}
+	vmlinux_link ${1}
 	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+
+	# dump .BTF section into raw binary file to link with final vmlinux
+	bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
+		cut -d, -f1 | cut -d' ' -f2)
+	${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null
+	${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
+		--rename-section .data=.BTF .btf.vmlinux.bin ${2}
 }
 
 # Create ${2} .o file with all symbols from the ${1} object file
@@ -153,6 +164,7 @@ sortextable()
 # Delete output files in case of error
 cleanup()
 {
+	rm -f .btf.*
 	rm -f .tmp_System.map
 	rm -f .tmp_kallsyms*
 	rm -f .tmp_vmlinux*
@@ -215,6 +227,13 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" MODPOST_VMLINUX=1
 info MODINFO modules.builtin.modinfo
 ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
 
+btf_vmlinux_bin_o=""
+if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
+	if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then
+		btf_vmlinux_bin_o=.btf.vmlinux.bin.o
+	fi
+fi
+
 kallsymso=""
 kallsyms_vmlinux=""
 if [ -n "${CONFIG_KALLSYMS}" ]; then
@@ -246,11 +265,11 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
 	kallsyms_vmlinux=.tmp_vmlinux2
 
 	# step 1
-	vmlinux_link "" .tmp_vmlinux1
+	vmlinux_link .tmp_vmlinux1 ${btf_vmlinux_bin_o}
 	kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o
 
 	# step 2
-	vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2
+	vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_vmlinux_bin_o}
 	kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o
 
 	# step 3
@@ -261,18 +280,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
 		kallsymso=.tmp_kallsyms3.o
 		kallsyms_vmlinux=.tmp_vmlinux3
 
-		vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3
-
+		vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_vmlinux_bin_o}
 		kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o
 	fi
 fi
 
 info LD vmlinux
-vmlinux_link "${kallsymso}" vmlinux
-
-if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
-	gen_btf vmlinux
-fi
+vmlinux_link vmlinux "${kallsymso}" "${btf_vmlinux_bin_o}"
 
 if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
 	info SORTEX vmlinux
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 585f270c2d25..06a28b07787d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -20,8 +20,8 @@ SYNOPSIS
 CGROUP COMMANDS
 ===============
 
-|	**bpftool** **cgroup { show | list }** *CGROUP*
-|	**bpftool** **cgroup tree** [*CGROUP_ROOT*]
+|	**bpftool** **cgroup { show | list }** *CGROUP* [**effective**]
+|	**bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**]
 |	**bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 |	**bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 |	**bpftool** **cgroup help**
@@ -35,13 +35,17 @@ CGROUP COMMANDS
 
 DESCRIPTION
 ===========
-	**bpftool cgroup { show | list }** *CGROUP*
+	**bpftool cgroup { show | list }** *CGROUP* [**effective**]
 		  List all programs attached to the cgroup *CGROUP*.
 
 		  Output will start with program ID followed by attach type,
 		  attach flags and program name.
 
-	**bpftool cgroup tree** [*CGROUP_ROOT*]
+		  If **effective** is specified retrieve effective programs that
+		  will execute for events within a cgroup. This includes
+		  inherited along with attached ones.
+
+	**bpftool cgroup tree** [*CGROUP_ROOT*] [**effective**]
 		  Iterate over all cgroups in *CGROUP_ROOT* and list all
 		  attached programs. If *CGROUP_ROOT* is not specified,
 		  bpftool uses cgroup v2 mountpoint.
@@ -50,6 +54,10 @@ DESCRIPTION
 		  commands: it starts with absolute cgroup path, followed by
 		  program ID, attach type, attach flags and program name.
 
+		  If **effective** is specified retrieve effective programs that
+		  will execute for events within a cgroup. This includes
+		  inherited along with attached ones.
+
 	**bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 		  Attach program *PROG* to the cgroup *CGROUP* with attach type
 		  *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 490b4501cb6e..61d1d270eb5e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -46,7 +46,7 @@ MAP COMMANDS
 |	*TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
 |		| **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
-|		| **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
+|		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
 |		| **queue** | **stack** }
 
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index a7afea4dec47..4c9d1ffc3fc7 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -52,14 +52,14 @@ ifneq ($(EXTRA_LDFLAGS),)
 LDFLAGS += $(EXTRA_LDFLAGS)
 endif
 
-LIBS = -lelf $(LIBBPF)
+LIBS = -lelf -lz $(LIBBPF)
 
 INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray
-FEATURE_DISPLAY = libbfd disassembler-four-args
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -111,6 +111,8 @@ OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
+$(OUTPUT)feature.o: | zdep
+
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
 
@@ -149,6 +151,9 @@ doc-uninstall:
 
 FORCE:
 
-.PHONY: all FORCE clean install uninstall
+zdep:
+	@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+
+.PHONY: all FORCE clean install uninstall zdep
 .PHONY: doc doc-clean doc-install doc-uninstall
 .DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index c8f42e1fcbc9..df16c5415444 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -489,8 +489,8 @@ _bpftool()
                                 perf_event_array percpu_hash percpu_array \
                                 stack_trace cgroup_array lru_hash \
                                 lru_percpu_hash lpm_trie array_of_maps \
-                                hash_of_maps devmap sockmap cpumap xskmap \
-                                sockhash cgroup_storage reuseport_sockarray \
+                                hash_of_maps devmap devmap_hash sockmap cpumap \
+                                xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack' -- \
                                                    "$cur" ) )
                             return 0
@@ -710,12 +710,15 @@ _bpftool()
             ;;
         cgroup)
             case $command in
-                show|list)
-                    _filedir
-                    return 0
-                    ;;
-                tree)
-                    _filedir
+                show|list|tree)
+                    case $cword in
+                        3)
+                            _filedir
+                            ;;
+                        4)
+                            COMPREPLY=( $( compgen -W 'effective' -- "$cur" ) )
+                            ;;
+                    esac
                     return 0
                     ;;
                 attach|detach)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index f3c05b08c68c..44352b5aca85 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -29,6 +29,8 @@
 	"                        recvmsg4 | recvmsg6 | sysctl |\n"	       \
 	"                        getsockopt | setsockopt }"
 
+static unsigned int query_flags;
+
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
 	[BPF_CGROUP_INET_EGRESS] = "egress",
@@ -107,7 +109,8 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
 	__u32 prog_cnt = 0;
 	int ret;
 
-	ret = bpf_prog_query(cgroup_fd, type, 0, NULL, NULL, &prog_cnt);
+	ret = bpf_prog_query(cgroup_fd, type, query_flags, NULL,
+			     NULL, &prog_cnt);
 	if (ret)
 		return -1;
 
@@ -125,8 +128,8 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 	int ret;
 
 	prog_cnt = ARRAY_SIZE(prog_ids);
-	ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids,
-			     &prog_cnt);
+	ret = bpf_prog_query(cgroup_fd, type, query_flags, &attach_flags,
+			     prog_ids, &prog_cnt);
 	if (ret)
 		return ret;
 
@@ -158,20 +161,34 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 static int do_show(int argc, char **argv)
 {
 	enum bpf_attach_type type;
+	const char *path;
 	int cgroup_fd;
 	int ret = -1;
 
-	if (argc < 1) {
-		p_err("too few parameters for cgroup show");
-		goto exit;
-	} else if (argc > 1) {
-		p_err("too many parameters for cgroup show");
-		goto exit;
+	query_flags = 0;
+
+	if (!REQ_ARGS(1))
+		return -1;
+	path = GET_ARG();
+
+	while (argc) {
+		if (is_prefix(*argv, "effective")) {
+			if (query_flags & BPF_F_QUERY_EFFECTIVE) {
+				p_err("duplicated argument: %s", *argv);
+				return -1;
+			}
+			query_flags |= BPF_F_QUERY_EFFECTIVE;
+			NEXT_ARG();
+		} else {
+			p_err("expected no more arguments, 'effective', got: '%s'?",
+			      *argv);
+			return -1;
+		}
 	}
 
-	cgroup_fd = open(argv[0], O_RDONLY);
+	cgroup_fd = open(path, O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s", argv[0]);
+		p_err("can't open cgroup %s", path);
 		goto exit;
 	}
 
@@ -294,26 +311,37 @@ static char *find_cgroup_root(void)
 
 static int do_show_tree(int argc, char **argv)
 {
-	char *cgroup_root;
+	char *cgroup_root, *cgroup_alloced = NULL;
 	int ret;
 
-	switch (argc) {
-	case 0:
-		cgroup_root = find_cgroup_root();
-		if (!cgroup_root) {
+	query_flags = 0;
+
+	if (!argc) {
+		cgroup_alloced = find_cgroup_root();
+		if (!cgroup_alloced) {
 			p_err("cgroup v2 isn't mounted");
 			return -1;
 		}
-		break;
-	case 1:
-		cgroup_root = argv[0];
-		break;
-	default:
-		p_err("too many parameters for cgroup tree");
-		return -1;
+		cgroup_root = cgroup_alloced;
+	} else {
+		cgroup_root = GET_ARG();
+
+		while (argc) {
+			if (is_prefix(*argv, "effective")) {
+				if (query_flags & BPF_F_QUERY_EFFECTIVE) {
+					p_err("duplicated argument: %s", *argv);
+					return -1;
+				}
+				query_flags |= BPF_F_QUERY_EFFECTIVE;
+				NEXT_ARG();
+			} else {
+				p_err("expected no more arguments, 'effective', got: '%s'?",
+				      *argv);
+				return -1;
+			}
+		}
 	}
 
-
 	if (json_output)
 		jsonw_start_array(json_wtr);
 	else
@@ -338,8 +366,7 @@ static int do_show_tree(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
-	if (argc == 0)
-		free(cgroup_root);
+	free(cgroup_alloced);
 
 	return ret;
 }
@@ -459,8 +486,8 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list } CGROUP\n"
-		"       %s %s tree [CGROUP_ROOT]\n"
+		"Usage: %s %s { show | list } CGROUP [**effective**]\n"
+		"       %s %s tree [CGROUP_ROOT] [**effective**]\n"
 		"       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
 		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
 		"       %s %s help\n"
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index d672d9086fff..03bdc5b3ac49 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -14,6 +14,7 @@
 
 #include <bpf.h>
 #include <libbpf.h>
+#include <zlib.h>
 
 #include "main.h"
 
@@ -284,34 +285,32 @@ static void probe_jit_limit(void)
 	}
 }
 
-static char *get_kernel_config_option(FILE *fd, const char *option)
+static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
+					   char **value)
 {
-	size_t line_n = 0, optlen = strlen(option);
-	char *res, *strval, *line = NULL;
-	ssize_t n;
+	char *sep;
 
-	rewind(fd);
-	while ((n = getline(&line, &line_n, fd)) > 0) {
-		if (strncmp(line, option, optlen))
+	while (gzgets(file, buf, n)) {
+		if (strncmp(buf, "CONFIG_", 7))
 			continue;
-		/* Check we have at least '=', value, and '\n' */
-		if (strlen(line) < optlen + 3)
-			continue;
-		if (*(line + optlen) != '=')
+
+		sep = strchr(buf, '=');
+		if (!sep)
 			continue;
 
 		/* Trim ending '\n' */
-		line[strlen(line) - 1] = '\0';
+		buf[strlen(buf) - 1] = '\0';
+
+		/* Split on '=' and ensure that a value is present. */
+		*sep = '\0';
+		if (!sep[1])
+			continue;
 
-		/* Copy and return config option value */
-		strval = line + optlen + 1;
-		res = strdup(strval);
-		free(line);
-		return res;
+		*value = sep + 1;
+		return true;
 	}
-	free(line);
 
-	return NULL;
+	return false;
 }
 
 static void probe_kernel_image_config(void)
@@ -386,59 +385,61 @@ static void probe_kernel_image_config(void)
 		/* test_bpf module for BPF tests */
 		"CONFIG_TEST_BPF",
 	};
-	char *value, *buf = NULL;
+	char *values[ARRAY_SIZE(options)] = { };
 	struct utsname utsn;
 	char path[PATH_MAX];
-	size_t i, n;
-	ssize_t ret;
-	FILE *fd;
+	gzFile file = NULL;
+	char buf[4096];
+	char *value;
+	size_t i;
 
-	if (uname(&utsn))
-		goto no_config;
+	if (!uname(&utsn)) {
+		snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
 
-	snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+		/* gzopen also accepts uncompressed files. */
+		file = gzopen(path, "r");
+	}
 
-	fd = fopen(path, "r");
-	if (!fd && errno == ENOENT) {
-		/* Some distributions put the config file at /proc/config, give
-		 * it a try.
-		 * Sometimes it is also at /proc/config.gz but we do not try
-		 * this one for now, it would require linking against libz.
+	if (!file) {
+		/* Some distributions build with CONFIG_IKCONFIG=y and put the
+		 * config file at /proc/config.gz.
 		 */
-		fd = fopen("/proc/config", "r");
+		file = gzopen("/proc/config.gz", "r");
 	}
-	if (!fd) {
+	if (!file) {
 		p_info("skipping kernel config, can't open file: %s",
 		       strerror(errno));
-		goto no_config;
+		goto end_parse;
 	}
 	/* Sanity checks */
-	ret = getline(&buf, &n, fd);
-	ret = getline(&buf, &n, fd);
-	if (!buf || !ret) {
+	if (!gzgets(file, buf, sizeof(buf)) ||
+	    !gzgets(file, buf, sizeof(buf))) {
 		p_info("skipping kernel config, can't read from file: %s",
 		       strerror(errno));
-		free(buf);
-		goto no_config;
+		goto end_parse;
 	}
 	if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
 		p_info("skipping kernel config, can't find correct file");
-		free(buf);
-		goto no_config;
+		goto end_parse;
 	}
-	free(buf);
 
-	for (i = 0; i < ARRAY_SIZE(options); i++) {
-		value = get_kernel_config_option(fd, options[i]);
-		print_kernel_option(options[i], value);
-		free(value);
+	while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
+		for (i = 0; i < ARRAY_SIZE(options); i++) {
+			if (values[i] || strcmp(buf, options[i]))
+				continue;
+
+			values[i] = strdup(value);
+		}
 	}
-	fclose(fd);
-	return;
 
-no_config:
-	for (i = 0; i < ARRAY_SIZE(options); i++)
-		print_kernel_option(options[i], NULL);
+end_parse:
+	if (file)
+		gzclose(file);
+
+	for (i = 0; i < ARRAY_SIZE(options); i++) {
+		print_kernel_option(options[i], values[i]);
+		free(values[i]);
+	}
 }
 
 static bool probe_bpf_syscall(const char *define_prefix)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 5da5a7311f13..bfbbc6b4cb83 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -37,6 +37,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_ARRAY_OF_MAPS]		= "array_of_maps",
 	[BPF_MAP_TYPE_HASH_OF_MAPS]		= "hash_of_maps",
 	[BPF_MAP_TYPE_DEVMAP]			= "devmap",
+	[BPF_MAP_TYPE_DEVMAP_HASH]		= "devmap_hash",
 	[BPF_MAP_TYPE_SOCKMAP]			= "sockmap",
 	[BPF_MAP_TYPE_CPUMAP]			= "cpumap",
 	[BPF_MAP_TYPE_XSKMAP]			= "xskmap",
@@ -1271,7 +1272,7 @@ static int do_help(int argc, char **argv)
 		"       TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n"
 		"                 percpu_array | stack_trace | cgroup_array | lru_hash |\n"
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
-		"                 devmap | sockmap | cpumap | xskmap | sockhash |\n"
+		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4e455018da65..4393bd4b2419 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_DEVMAP_HASH,
 };
 
 /* Note that tracing related programs such as
@@ -1571,8 +1572,11 @@ union bpf_attr {
  * 		but this is only implemented for native XDP (with driver
  * 		support) as of this writing).
  *
- * 		All values for *flags* are reserved for future usage, and must
- * 		be left at zero.
+ * 		The lower two bits of *flags* are used as the return code if
+ * 		the map lookup fails. This is so that the return value can be
+ * 		one of the XDP program return codes up to XDP_TX, as chosen by
+ * 		the caller. Any higher bits in the *flags* argument must be
+ * 		unset.
  *
  * 		When used to redirect packets to net devices, this helper
  * 		provides a high performance increase over **bpf_redirect**\ ().
@@ -2710,6 +2714,33 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *	Description
+ *		Try to issue a SYN cookie for the packet with corresponding
+ *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ *		*iph* points to the start of the IPv4 or IPv6 header, while
+ *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+ *		**sizeof**\ (**struct ip6hdr**).
+ *
+ *		*th* points to the start of the TCP header, while *th_len*
+ *		contains the length of the TCP header.
+ *
+ *	Return
+ *		On success, lower 32 bits hold the generated SYN cookie in
+ *		followed by 16 bits which hold the MSS value for that cookie,
+ *		and the top 16 bits are unused.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** SYN cookie cannot be issued due to error
+ *
+ *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2821,7 +2852,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(tcp_gen_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3504,6 +3536,10 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
+
 struct bpf_flow_keys {
 	__u16	nhoff;
 	__u16	thoff;
@@ -3525,6 +3561,8 @@ struct bpf_flow_keys {
 			__u32	ipv6_dst[4];	/* in6_addr; network order */
 		};
 	};
+	__u32	flags;
+	__be32	flow_label;
 };
 
 struct bpf_func_info {
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d821107f55f9..1aa189a9112a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -20,13 +20,6 @@
 #define BTF_MAX_NR_TYPES 0x7fffffff
 #define BTF_MAX_STR_OFFSET 0x7fffffff
 
-#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
-		((k) == BTF_KIND_VOLATILE) || \
-		((k) == BTF_KIND_CONST) || \
-		((k) == BTF_KIND_RESTRICT))
-
-#define IS_VAR(k) ((k) == BTF_KIND_VAR)
-
 static struct btf_type btf_void;
 
 struct btf {
@@ -43,47 +36,6 @@ struct btf {
 	int fd;
 };
 
-struct btf_ext_info {
-	/*
-	 * info points to the individual info section (e.g. func_info and
-	 * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
-	 */
-	void *info;
-	__u32 rec_size;
-	__u32 len;
-};
-
-struct btf_ext {
-	union {
-		struct btf_ext_header *hdr;
-		void *data;
-	};
-	struct btf_ext_info func_info;
-	struct btf_ext_info line_info;
-	__u32 data_size;
-};
-
-struct btf_ext_info_sec {
-	__u32	sec_name_off;
-	__u32	num_info;
-	/* Followed by num_info * record_size number of bytes */
-	__u8	data[0];
-};
-
-/* The minimum bpf_func_info checked by the loader */
-struct bpf_func_info_min {
-	__u32   insn_off;
-	__u32   type_id;
-};
-
-/* The minimum bpf_line_info checked by the loader */
-struct bpf_line_info_min {
-	__u32	insn_off;
-	__u32	file_name_off;
-	__u32	line_off;
-	__u32	line_col;
-};
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
@@ -193,9 +145,9 @@ static int btf_parse_str_sec(struct btf *btf)
 static int btf_type_size(struct btf_type *t)
 {
 	int base_size = sizeof(struct btf_type);
-	__u16 vlen = BTF_INFO_VLEN(t->info);
+	__u16 vlen = btf_vlen(t);
 
-	switch (BTF_INFO_KIND(t->info)) {
+	switch (btf_kind(t)) {
 	case BTF_KIND_FWD:
 	case BTF_KIND_CONST:
 	case BTF_KIND_VOLATILE:
@@ -220,7 +172,7 @@ static int btf_type_size(struct btf_type *t)
 	case BTF_KIND_DATASEC:
 		return base_size + vlen * sizeof(struct btf_var_secinfo);
 	default:
-		pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
+		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
 		return -EINVAL;
 	}
 }
@@ -264,7 +216,7 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 
 static bool btf_type_is_void(const struct btf_type *t)
 {
-	return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+	return t == &btf_void || btf_is_fwd(t);
 }
 
 static bool btf_type_is_void_or_null(const struct btf_type *t)
@@ -285,7 +237,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 	t = btf__type_by_id(btf, type_id);
 	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
 	     i++) {
-		switch (BTF_INFO_KIND(t->info)) {
+		switch (btf_kind(t)) {
 		case BTF_KIND_INT:
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
@@ -304,7 +256,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 			type_id = t->type;
 			break;
 		case BTF_KIND_ARRAY:
-			array = (const struct btf_array *)(t + 1);
+			array = btf_array(t);
 			if (nelems && array->nelems > UINT32_MAX / nelems)
 				return -E2BIG;
 			nelems *= array->nelems;
@@ -335,8 +287,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	t = btf__type_by_id(btf, type_id);
 	while (depth < MAX_RESOLVE_DEPTH &&
 	       !btf_type_is_void_or_null(t) &&
-	       (IS_MODIFIER(BTF_INFO_KIND(t->info)) ||
-		IS_VAR(BTF_INFO_KIND(t->info)))) {
+	       (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
 		type_id = t->type;
 		t = btf__type_by_id(btf, type_id);
 		depth++;
@@ -555,11 +506,11 @@ static int compare_vsi_off(const void *_a, const void *_b)
 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 			     struct btf_type *t)
 {
-	__u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info);
+	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
 	const char *name = btf__name_by_offset(btf, t->name_off);
 	const struct btf_type *t_var;
 	struct btf_var_secinfo *vsi;
-	struct btf_var *var;
+	const struct btf_var *var;
 	int ret;
 
 	if (!name) {
@@ -575,12 +526,11 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 
 	t->size = size;
 
-	for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1);
-	     i < vars; i++, vsi++) {
+	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 		t_var = btf__type_by_id(btf, vsi->type);
-		var = (struct btf_var *)(t_var + 1);
+		var = btf_var(t_var);
 
-		if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) {
+		if (!btf_is_var(t_var)) {
 			pr_debug("Non-VAR type seen in section %s\n", name);
 			return -EINVAL;
 		}
@@ -596,7 +546,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 
 		ret = bpf_object__variable_offset(obj, name, &off);
 		if (ret) {
-			pr_debug("No offset found in symbol table for VAR %s\n", name);
+			pr_debug("No offset found in symbol table for VAR %s\n",
+				 name);
 			return -ENOENT;
 		}
 
@@ -620,7 +571,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 		 * is section size and global variable offset. We use
 		 * the info from the ELF itself for this purpose.
 		 */
-		if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) {
+		if (btf_is_datasec(t)) {
 			err = btf_fixup_datasec(obj, btf, t);
 			if (err)
 				break;
@@ -775,14 +726,13 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 		return -EINVAL;
 	}
 
-	if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
-	    BTF_INFO_VLEN(container_type->info) < 2) {
+	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
 		pr_warning("map:%s container_name:%s is an invalid container struct\n",
 			   map_name, container_name);
 		return -EINVAL;
 	}
 
-	key = (struct btf_member *)(container_type + 1);
+	key = btf_members(container_type);
 	value = key + 1;
 
 	key_size = btf__resolve_size(btf, key->type);
@@ -832,6 +782,9 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
 	/* The start of the info sec (including the __u32 record_size). */
 	void *info;
 
+	if (ext_sec->len == 0)
+		return 0;
+
 	if (ext_sec->off & 0x03) {
 		pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
 		     ext_sec->desc);
@@ -935,11 +888,24 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
 	return btf_ext_setup_info(btf_ext, &param);
 }
 
+static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext)
+{
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->offset_reloc_off,
+		.len = btf_ext->hdr->offset_reloc_len,
+		.min_rec_size = sizeof(struct bpf_offset_reloc),
+		.ext_info = &btf_ext->offset_reloc_info,
+		.desc = "offset_reloc",
+	};
+
+	return btf_ext_setup_info(btf_ext, &param);
+}
+
 static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
 {
 	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
 
-	if (data_size < offsetof(struct btf_ext_header, func_info_off) ||
+	if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
 	    data_size < hdr->hdr_len) {
 		pr_debug("BTF.ext header not found");
 		return -EINVAL;
@@ -997,6 +963,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	}
 	memcpy(btf_ext->data, data, size);
 
+	if (btf_ext->hdr->hdr_len <
+	    offsetofend(struct btf_ext_header, line_info_len))
+		goto done;
 	err = btf_ext_setup_func_info(btf_ext);
 	if (err)
 		goto done;
@@ -1005,6 +974,13 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	if (err)
 		goto done;
 
+	if (btf_ext->hdr->hdr_len <
+	    offsetofend(struct btf_ext_header, offset_reloc_len))
+		goto done;
+	err = btf_ext_setup_offset_reloc(btf_ext);
+	if (err)
+		goto done;
+
 done:
 	if (err) {
 		btf_ext__free(btf_ext);
@@ -1441,10 +1417,9 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 	d->map[0] = 0;
 	for (i = 1; i <= btf->nr_types; i++) {
 		struct btf_type *t = d->btf->types[i];
-		__u16 kind = BTF_INFO_KIND(t->info);
 
 		/* VAR and DATASEC are never deduped and are self-canonical */
-		if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC)
+		if (btf_is_var(t) || btf_is_datasec(t))
 			d->map[i] = i;
 		else
 			d->map[i] = BTF_UNPROCESSED_ID;
@@ -1485,11 +1460,11 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 		if (r)
 			return r;
 
-		switch (BTF_INFO_KIND(t->info)) {
+		switch (btf_kind(t)) {
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION: {
-			struct btf_member *m = (struct btf_member *)(t + 1);
-			__u16 vlen = BTF_INFO_VLEN(t->info);
+			struct btf_member *m = btf_members(t);
+			__u16 vlen = btf_vlen(t);
 
 			for (j = 0; j < vlen; j++) {
 				r = fn(&m->name_off, ctx);
@@ -1500,8 +1475,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 			break;
 		}
 		case BTF_KIND_ENUM: {
-			struct btf_enum *m = (struct btf_enum *)(t + 1);
-			__u16 vlen = BTF_INFO_VLEN(t->info);
+			struct btf_enum *m = btf_enum(t);
+			__u16 vlen = btf_vlen(t);
 
 			for (j = 0; j < vlen; j++) {
 				r = fn(&m->name_off, ctx);
@@ -1512,8 +1487,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 			break;
 		}
 		case BTF_KIND_FUNC_PROTO: {
-			struct btf_param *m = (struct btf_param *)(t + 1);
-			__u16 vlen = BTF_INFO_VLEN(t->info);
+			struct btf_param *m = btf_params(t);
+			__u16 vlen = btf_vlen(t);
 
 			for (j = 0; j < vlen; j++) {
 				r = fn(&m->name_off, ctx);
@@ -1802,16 +1777,16 @@ static long btf_hash_enum(struct btf_type *t)
 /* Check structural equality of two ENUMs. */
 static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_enum *m1, *m2;
+	const struct btf_enum *m1, *m2;
 	__u16 vlen;
 	int i;
 
 	if (!btf_equal_common(t1, t2))
 		return false;
 
-	vlen = BTF_INFO_VLEN(t1->info);
-	m1 = (struct btf_enum *)(t1 + 1);
-	m2 = (struct btf_enum *)(t2 + 1);
+	vlen = btf_vlen(t1);
+	m1 = btf_enum(t1);
+	m2 = btf_enum(t2);
 	for (i = 0; i < vlen; i++) {
 		if (m1->name_off != m2->name_off || m1->val != m2->val)
 			return false;
@@ -1823,8 +1798,7 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
 
 static inline bool btf_is_enum_fwd(struct btf_type *t)
 {
-	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
-	       BTF_INFO_VLEN(t->info) == 0;
+	return btf_is_enum(t) && btf_vlen(t) == 0;
 }
 
 static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
@@ -1844,8 +1818,8 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
  */
 static long btf_hash_struct(struct btf_type *t)
 {
-	struct btf_member *member = (struct btf_member *)(t + 1);
-	__u32 vlen = BTF_INFO_VLEN(t->info);
+	const struct btf_member *member = btf_members(t);
+	__u32 vlen = btf_vlen(t);
 	long h = btf_hash_common(t);
 	int i;
 
@@ -1865,16 +1839,16 @@ static long btf_hash_struct(struct btf_type *t)
  */
 static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_member *m1, *m2;
+	const struct btf_member *m1, *m2;
 	__u16 vlen;
 	int i;
 
 	if (!btf_equal_common(t1, t2))
 		return false;
 
-	vlen = BTF_INFO_VLEN(t1->info);
-	m1 = (struct btf_member *)(t1 + 1);
-	m2 = (struct btf_member *)(t2 + 1);
+	vlen = btf_vlen(t1);
+	m1 = btf_members(t1);
+	m2 = btf_members(t2);
 	for (i = 0; i < vlen; i++) {
 		if (m1->name_off != m2->name_off || m1->offset != m2->offset)
 			return false;
@@ -1891,7 +1865,7 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
  */
 static long btf_hash_array(struct btf_type *t)
 {
-	struct btf_array *info = (struct btf_array *)(t + 1);
+	const struct btf_array *info = btf_array(t);
 	long h = btf_hash_common(t);
 
 	h = hash_combine(h, info->type);
@@ -1909,13 +1883,13 @@ static long btf_hash_array(struct btf_type *t)
  */
 static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_array *info1, *info2;
+	const struct btf_array *info1, *info2;
 
 	if (!btf_equal_common(t1, t2))
 		return false;
 
-	info1 = (struct btf_array *)(t1 + 1);
-	info2 = (struct btf_array *)(t2 + 1);
+	info1 = btf_array(t1);
+	info2 = btf_array(t2);
 	return info1->type == info2->type &&
 	       info1->index_type == info2->index_type &&
 	       info1->nelems == info2->nelems;
@@ -1928,14 +1902,10 @@ static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
  */
 static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_array *info1, *info2;
-
 	if (!btf_equal_common(t1, t2))
 		return false;
 
-	info1 = (struct btf_array *)(t1 + 1);
-	info2 = (struct btf_array *)(t2 + 1);
-	return info1->nelems == info2->nelems;
+	return btf_array(t1)->nelems == btf_array(t2)->nelems;
 }
 
 /*
@@ -1945,8 +1915,8 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
  */
 static long btf_hash_fnproto(struct btf_type *t)
 {
-	struct btf_param *member = (struct btf_param *)(t + 1);
-	__u16 vlen = BTF_INFO_VLEN(t->info);
+	const struct btf_param *member = btf_params(t);
+	__u16 vlen = btf_vlen(t);
 	long h = btf_hash_common(t);
 	int i;
 
@@ -1967,16 +1937,16 @@ static long btf_hash_fnproto(struct btf_type *t)
  */
 static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_param *m1, *m2;
+	const struct btf_param *m1, *m2;
 	__u16 vlen;
 	int i;
 
 	if (!btf_equal_common(t1, t2))
 		return false;
 
-	vlen = BTF_INFO_VLEN(t1->info);
-	m1 = (struct btf_param *)(t1 + 1);
-	m2 = (struct btf_param *)(t2 + 1);
+	vlen = btf_vlen(t1);
+	m1 = btf_params(t1);
+	m2 = btf_params(t2);
 	for (i = 0; i < vlen; i++) {
 		if (m1->name_off != m2->name_off || m1->type != m2->type)
 			return false;
@@ -1993,7 +1963,7 @@ static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
  */
 static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
-	struct btf_param *m1, *m2;
+	const struct btf_param *m1, *m2;
 	__u16 vlen;
 	int i;
 
@@ -2001,9 +1971,9 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 	if (t1->name_off != t2->name_off || t1->info != t2->info)
 		return false;
 
-	vlen = BTF_INFO_VLEN(t1->info);
-	m1 = (struct btf_param *)(t1 + 1);
-	m2 = (struct btf_param *)(t2 + 1);
+	vlen = btf_vlen(t1);
+	m1 = btf_params(t1);
+	m2 = btf_params(t2);
 	for (i = 0; i < vlen; i++) {
 		if (m1->name_off != m2->name_off)
 			return false;
@@ -2029,7 +1999,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	__u32 cand_id;
 	long h;
 
-	switch (BTF_INFO_KIND(t->info)) {
+	switch (btf_kind(t)) {
 	case BTF_KIND_CONST:
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_RESTRICT:
@@ -2142,13 +2112,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
 {
 	__u32 orig_type_id = type_id;
 
-	if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
+	if (!btf_is_fwd(d->btf->types[type_id]))
 		return type_id;
 
 	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
 		type_id = d->map[type_id];
 
-	if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
+	if (!btf_is_fwd(d->btf->types[type_id]))
 		return type_id;
 
 	return orig_type_id;
@@ -2157,7 +2127,7 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
 
 static inline __u16 btf_fwd_kind(struct btf_type *t)
 {
-	return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
+	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
 }
 
 /*
@@ -2278,8 +2248,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 
 	cand_type = d->btf->types[cand_id];
 	canon_type = d->btf->types[canon_id];
-	cand_kind = BTF_INFO_KIND(cand_type->info);
-	canon_kind = BTF_INFO_KIND(canon_type->info);
+	cand_kind = btf_kind(cand_type);
+	canon_kind = btf_kind(canon_type);
 
 	if (cand_type->name_off != canon_type->name_off)
 		return 0;
@@ -2328,12 +2298,12 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
 
 	case BTF_KIND_ARRAY: {
-		struct btf_array *cand_arr, *canon_arr;
+		const struct btf_array *cand_arr, *canon_arr;
 
 		if (!btf_compat_array(cand_type, canon_type))
 			return 0;
-		cand_arr = (struct btf_array *)(cand_type + 1);
-		canon_arr = (struct btf_array *)(canon_type + 1);
+		cand_arr = btf_array(cand_type);
+		canon_arr = btf_array(canon_type);
 		eq = btf_dedup_is_equiv(d,
 			cand_arr->index_type, canon_arr->index_type);
 		if (eq <= 0)
@@ -2343,14 +2313,14 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION: {
-		struct btf_member *cand_m, *canon_m;
+		const struct btf_member *cand_m, *canon_m;
 		__u16 vlen;
 
 		if (!btf_shallow_equal_struct(cand_type, canon_type))
 			return 0;
-		vlen = BTF_INFO_VLEN(cand_type->info);
-		cand_m = (struct btf_member *)(cand_type + 1);
-		canon_m = (struct btf_member *)(canon_type + 1);
+		vlen = btf_vlen(cand_type);
+		cand_m = btf_members(cand_type);
+		canon_m = btf_members(canon_type);
 		for (i = 0; i < vlen; i++) {
 			eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
 			if (eq <= 0)
@@ -2363,7 +2333,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 	}
 
 	case BTF_KIND_FUNC_PROTO: {
-		struct btf_param *cand_p, *canon_p;
+		const struct btf_param *cand_p, *canon_p;
 		__u16 vlen;
 
 		if (!btf_compat_fnproto(cand_type, canon_type))
@@ -2371,9 +2341,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
 		if (eq <= 0)
 			return eq;
-		vlen = BTF_INFO_VLEN(cand_type->info);
-		cand_p = (struct btf_param *)(cand_type + 1);
-		canon_p = (struct btf_param *)(canon_type + 1);
+		vlen = btf_vlen(cand_type);
+		cand_p = btf_params(cand_type);
+		canon_p = btf_params(canon_type);
 		for (i = 0; i < vlen; i++) {
 			eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
 			if (eq <= 0)
@@ -2428,8 +2398,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 		targ_type_id = d->hypot_map[cand_type_id];
 		t_id = resolve_type_id(d, targ_type_id);
 		c_id = resolve_type_id(d, cand_type_id);
-		t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info);
-		c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info);
+		t_kind = btf_kind(d->btf->types[t_id]);
+		c_kind = btf_kind(d->btf->types[c_id]);
 		/*
 		 * Resolve FWD into STRUCT/UNION.
 		 * It's ok to resolve FWD into STRUCT/UNION that's not yet
@@ -2498,7 +2468,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		return 0;
 
 	t = d->btf->types[type_id];
-	kind = BTF_INFO_KIND(t->info);
+	kind = btf_kind(t);
 
 	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
 		return 0;
@@ -2593,7 +2563,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 	t = d->btf->types[type_id];
 	d->map[type_id] = BTF_IN_PROGRESS_ID;
 
-	switch (BTF_INFO_KIND(t->info)) {
+	switch (btf_kind(t)) {
 	case BTF_KIND_CONST:
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_RESTRICT:
@@ -2617,7 +2587,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		break;
 
 	case BTF_KIND_ARRAY: {
-		struct btf_array *info = (struct btf_array *)(t + 1);
+		struct btf_array *info = btf_array(t);
 
 		ref_type_id = btf_dedup_ref_type(d, info->type);
 		if (ref_type_id < 0)
@@ -2651,8 +2621,8 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 			return ref_type_id;
 		t->type = ref_type_id;
 
-		vlen = BTF_INFO_VLEN(t->info);
-		param = (struct btf_param *)(t + 1);
+		vlen = btf_vlen(t);
+		param = btf_params(t);
 		for (i = 0; i < vlen; i++) {
 			ref_type_id = btf_dedup_ref_type(d, param->type);
 			if (ref_type_id < 0)
@@ -2792,7 +2762,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	struct btf_type *t = d->btf->types[type_id];
 	int i, r;
 
-	switch (BTF_INFO_KIND(t->info)) {
+	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
 		break;
@@ -2812,7 +2782,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 		break;
 
 	case BTF_KIND_ARRAY: {
-		struct btf_array *arr_info = (struct btf_array *)(t + 1);
+		struct btf_array *arr_info = btf_array(t);
 
 		r = btf_dedup_remap_type_id(d, arr_info->type);
 		if (r < 0)
@@ -2827,8 +2797,8 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION: {
-		struct btf_member *member = (struct btf_member *)(t + 1);
-		__u16 vlen = BTF_INFO_VLEN(t->info);
+		struct btf_member *member = btf_members(t);
+		__u16 vlen = btf_vlen(t);
 
 		for (i = 0; i < vlen; i++) {
 			r = btf_dedup_remap_type_id(d, member->type);
@@ -2841,8 +2811,8 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	}
 
 	case BTF_KIND_FUNC_PROTO: {
-		struct btf_param *param = (struct btf_param *)(t + 1);
-		__u16 vlen = BTF_INFO_VLEN(t->info);
+		struct btf_param *param = btf_params(t);
+		__u16 vlen = btf_vlen(t);
 
 		r = btf_dedup_remap_type_id(d, t->type);
 		if (r < 0)
@@ -2860,8 +2830,8 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	}
 
 	case BTF_KIND_DATASEC: {
-		struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1);
-		__u16 vlen = BTF_INFO_VLEN(t->info);
+		struct btf_var_secinfo *var = btf_var_secinfos(t);
+		__u16 vlen = btf_vlen(t);
 
 		for (i = 0; i < vlen; i++) {
 			r = btf_dedup_remap_type_id(d, var->type);
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 88a52ae56fc6..9cb44b4fbf60 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -5,6 +5,7 @@
 #define __LIBBPF_BTF_H
 
 #include <stdarg.h>
+#include <linux/btf.h>
 #include <linux/types.h>
 
 #ifdef __cplusplus
@@ -57,6 +58,10 @@ struct btf_ext_header {
 	__u32	func_info_len;
 	__u32	line_info_off;
 	__u32	line_info_len;
+
+	/* optional part of .BTF.ext header */
+	__u32	offset_reloc_off;
+	__u32	offset_reloc_len;
 };
 
 LIBBPF_API void btf__free(struct btf *btf);
@@ -120,6 +125,183 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d);
 
 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
 
+/*
+ * A set of helpers for easier BTF types handling
+ */
+static inline __u16 btf_kind(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info);
+}
+
+static inline __u16 btf_vlen(const struct btf_type *t)
+{
+	return BTF_INFO_VLEN(t->info);
+}
+
+static inline bool btf_kflag(const struct btf_type *t)
+{
+	return BTF_INFO_KFLAG(t->info);
+}
+
+static inline bool btf_is_int(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_INT;
+}
+
+static inline bool btf_is_ptr(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_PTR;
+}
+
+static inline bool btf_is_array(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+static inline bool btf_is_struct(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_is_union(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_composite(const struct btf_type *t)
+{
+	__u16 kind = btf_kind(t);
+
+	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_enum(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_is_fwd(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FWD;
+}
+
+static inline bool btf_is_typedef(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_is_volatile(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_VOLATILE;
+}
+
+static inline bool btf_is_const(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_CONST;
+}
+
+static inline bool btf_is_restrict(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_mod(const struct btf_type *t)
+{
+	__u16 kind = btf_kind(t);
+
+	return kind == BTF_KIND_VOLATILE ||
+	       kind == BTF_KIND_CONST ||
+	       kind == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_func(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FUNC;
+}
+
+static inline bool btf_is_func_proto(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FUNC_PROTO;
+}
+
+static inline bool btf_is_var(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_VAR;
+}
+
+static inline bool btf_is_datasec(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_DATASEC;
+}
+
+static inline __u8 btf_int_encoding(const struct btf_type *t)
+{
+	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_offset(const struct btf_type *t)
+{
+	return BTF_INT_OFFSET(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+	return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
+static inline struct btf_array *btf_array(const struct btf_type *t)
+{
+	return (struct btf_array *)(t + 1);
+}
+
+static inline struct btf_enum *btf_enum(const struct btf_type *t)
+{
+	return (struct btf_enum *)(t + 1);
+}
+
+static inline struct btf_member *btf_members(const struct btf_type *t)
+{
+	return (struct btf_member *)(t + 1);
+}
+
+/* Get bit offset of a member with specified index. */
+static inline __u32 btf_member_bit_offset(const struct btf_type *t,
+					  __u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+	bool kflag = btf_kflag(t);
+
+	return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+}
+/*
+ * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or
+ * BTF_KIND_UNION. If member is not a bitfield, zero is returned.
+ */
+static inline __u32 btf_member_bitfield_size(const struct btf_type *t,
+					     __u32 member_idx)
+{
+	const struct btf_member *m = btf_members(t) + member_idx;
+	bool kflag = btf_kflag(t);
+
+	return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+}
+
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+	return (struct btf_param *)(t + 1);
+}
+
+static inline struct btf_var *btf_var(const struct btf_type *t)
+{
+	return (struct btf_var *)(t + 1);
+}
+
+static inline struct btf_var_secinfo *
+btf_var_secinfos(const struct btf_type *t)
+{
+	return (struct btf_var_secinfo *)(t + 1);
+}
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 7065bb5b2752..715967762312 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -100,21 +100,6 @@ static bool str_equal_fn(const void *a, const void *b, void *ctx)
 	return strcmp(a, b) == 0;
 }
 
-static __u16 btf_kind_of(const struct btf_type *t)
-{
-	return BTF_INFO_KIND(t->info);
-}
-
-static __u16 btf_vlen_of(const struct btf_type *t)
-{
-	return BTF_INFO_VLEN(t->info);
-}
-
-static bool btf_kflag_of(const struct btf_type *t)
-{
-	return BTF_INFO_KFLAG(t->info);
-}
-
 static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
 {
 	return btf__name_by_offset(d->btf, name_off);
@@ -349,7 +334,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	 */
 	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
 	const struct btf_type *t;
-	__u16 kind, vlen;
+	__u16 vlen;
 	int err, i;
 
 	/* return true, letting typedefs know that it's ok to be emitted */
@@ -357,18 +342,16 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 		return 1;
 
 	t = btf__type_by_id(d->btf, id);
-	kind = btf_kind_of(t);
 
 	if (tstate->order_state == ORDERING) {
 		/* type loop, but resolvable through fwd declaration */
-		if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) &&
-		    through_ptr && t->name_off != 0)
+		if (btf_is_composite(t) && through_ptr && t->name_off != 0)
 			return 0;
 		pr_warning("unsatisfiable type cycle, id:[%u]\n", id);
 		return -ELOOP;
 	}
 
-	switch (kind) {
+	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
 		tstate->order_state = ORDERED;
 		return 0;
@@ -378,14 +361,12 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 		tstate->order_state = ORDERED;
 		return err;
 
-	case BTF_KIND_ARRAY: {
-		const struct btf_array *a = (void *)(t + 1);
+	case BTF_KIND_ARRAY:
+		return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
 
-		return btf_dump_order_type(d, a->type, through_ptr);
-	}
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION: {
-		const struct btf_member *m = (void *)(t + 1);
+		const struct btf_member *m = btf_members(t);
 		/*
 		 * struct/union is part of strong link, only if it's embedded
 		 * (so no ptr in a path) or it's anonymous (so has to be
@@ -396,7 +377,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 
 		tstate->order_state = ORDERING;
 
-		vlen = btf_vlen_of(t);
+		vlen = btf_vlen(t);
 		for (i = 0; i < vlen; i++, m++) {
 			err = btf_dump_order_type(d, m->type, false);
 			if (err < 0)
@@ -447,7 +428,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 		return btf_dump_order_type(d, t->type, through_ptr);
 
 	case BTF_KIND_FUNC_PROTO: {
-		const struct btf_param *p = (void *)(t + 1);
+		const struct btf_param *p = btf_params(t);
 		bool is_strong;
 
 		err = btf_dump_order_type(d, t->type, through_ptr);
@@ -455,7 +436,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 			return err;
 		is_strong = err > 0;
 
-		vlen = btf_vlen_of(t);
+		vlen = btf_vlen(t);
 		for (i = 0; i < vlen; i++, p++) {
 			err = btf_dump_order_type(d, p->type, through_ptr);
 			if (err < 0)
@@ -553,7 +534,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 		return;
 
 	t = btf__type_by_id(d->btf, id);
-	kind = btf_kind_of(t);
+	kind = btf_kind(t);
 
 	if (top_level_def && t->name_off == 0) {
 		pr_warning("unexpected nameless definition, id:[%u]\n", id);
@@ -618,12 +599,9 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 	case BTF_KIND_RESTRICT:
 		btf_dump_emit_type(d, t->type, cont_id);
 		break;
-	case BTF_KIND_ARRAY: {
-		const struct btf_array *a = (void *)(t + 1);
-
-		btf_dump_emit_type(d, a->type, cont_id);
+	case BTF_KIND_ARRAY:
+		btf_dump_emit_type(d, btf_array(t)->type, cont_id);
 		break;
-	}
 	case BTF_KIND_FWD:
 		btf_dump_emit_fwd_def(d, id, t);
 		btf_dump_printf(d, ";\n\n");
@@ -656,8 +634,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 		 * applicable
 		 */
 		if (top_level_def || t->name_off == 0) {
-			const struct btf_member *m = (void *)(t + 1);
-			__u16 vlen = btf_vlen_of(t);
+			const struct btf_member *m = btf_members(t);
+			__u16 vlen = btf_vlen(t);
 			int i, new_cont_id;
 
 			new_cont_id = t->name_off == 0 ? cont_id : id;
@@ -678,8 +656,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 		}
 		break;
 	case BTF_KIND_FUNC_PROTO: {
-		const struct btf_param *p = (void *)(t + 1);
-		__u16 vlen = btf_vlen_of(t);
+		const struct btf_param *p = btf_params(t);
+		__u16 vlen = btf_vlen(t);
 		int i;
 
 		btf_dump_emit_type(d, t->type, cont_id);
@@ -696,7 +674,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 static int btf_align_of(const struct btf *btf, __u32 id)
 {
 	const struct btf_type *t = btf__type_by_id(btf, id);
-	__u16 kind = btf_kind_of(t);
+	__u16 kind = btf_kind(t);
 
 	switch (kind) {
 	case BTF_KIND_INT:
@@ -709,15 +687,12 @@ static int btf_align_of(const struct btf *btf, __u32 id)
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
 		return btf_align_of(btf, t->type);
-	case BTF_KIND_ARRAY: {
-		const struct btf_array *a = (void *)(t + 1);
-
-		return btf_align_of(btf, a->type);
-	}
+	case BTF_KIND_ARRAY:
+		return btf_align_of(btf, btf_array(t)->type);
 	case BTF_KIND_STRUCT:
 	case BTF_KIND_UNION: {
-		const struct btf_member *m = (void *)(t + 1);
-		__u16 vlen = btf_vlen_of(t);
+		const struct btf_member *m = btf_members(t);
+		__u16 vlen = btf_vlen(t);
 		int i, align = 1;
 
 		for (i = 0; i < vlen; i++, m++)
@@ -726,7 +701,7 @@ static int btf_align_of(const struct btf *btf, __u32 id)
 		return align;
 	}
 	default:
-		pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t));
+		pr_warning("unsupported BTF_KIND:%u\n", btf_kind(t));
 		return 1;
 	}
 }
@@ -737,20 +712,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 	const struct btf_member *m;
 	int align, i, bit_sz;
 	__u16 vlen;
-	bool kflag;
 
 	align = btf_align_of(btf, id);
 	/* size of a non-packed struct has to be a multiple of its alignment*/
 	if (t->size % align)
 		return true;
 
-	m = (void *)(t + 1);
-	kflag = btf_kflag_of(t);
-	vlen = btf_vlen_of(t);
+	m = btf_members(t);
+	vlen = btf_vlen(t);
 	/* all non-bitfield fields have to be naturally aligned */
 	for (i = 0; i < vlen; i++, m++) {
 		align = btf_align_of(btf, m->type);
-		bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+		bit_sz = btf_member_bitfield_size(t, i);
 		if (bit_sz == 0 && m->offset % (8 * align) != 0)
 			return true;
 	}
@@ -807,7 +780,7 @@ static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
 				     const struct btf_type *t)
 {
 	btf_dump_printf(d, "%s %s",
-			btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union",
+			btf_is_struct(t) ? "struct" : "union",
 			btf_dump_type_name(d, id));
 }
 
@@ -816,12 +789,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 				     const struct btf_type *t,
 				     int lvl)
 {
-	const struct btf_member *m = (void *)(t + 1);
-	bool kflag = btf_kflag_of(t), is_struct;
+	const struct btf_member *m = btf_members(t);
+	bool is_struct = btf_is_struct(t);
 	int align, i, packed, off = 0;
-	__u16 vlen = btf_vlen_of(t);
+	__u16 vlen = btf_vlen(t);
 
-	is_struct = btf_kind_of(t) == BTF_KIND_STRUCT;
 	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
 	align = packed ? 1 : btf_align_of(d->btf, id);
 
@@ -835,8 +807,8 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 		int m_off, m_sz;
 
 		fname = btf_name_of(d, m->name_off);
-		m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
-		m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+		m_sz = btf_member_bitfield_size(t, i);
+		m_off = btf_member_bit_offset(t, i);
 		align = packed ? 1 : btf_align_of(d->btf, m->type);
 
 		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
@@ -870,8 +842,8 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
 				   const struct btf_type *t,
 				   int lvl)
 {
-	const struct btf_enum *v = (void *)(t+1);
-	__u16 vlen = btf_vlen_of(t);
+	const struct btf_enum *v = btf_enum(t);
+	__u16 vlen = btf_vlen(t);
 	const char *name;
 	size_t dup_cnt;
 	int i;
@@ -905,7 +877,7 @@ static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
 {
 	const char *name = btf_dump_type_name(d, id);
 
-	if (btf_kflag_of(t))
+	if (btf_kflag(t))
 		btf_dump_printf(d, "union %s", name);
 	else
 		btf_dump_printf(d, "struct %s", name);
@@ -987,7 +959,6 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 	struct id_stack decl_stack;
 	const struct btf_type *t;
 	int err, stack_start;
-	__u16 kind;
 
 	stack_start = d->decl_stack_cnt;
 	for (;;) {
@@ -1008,8 +979,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 			break;
 
 		t = btf__type_by_id(d->btf, id);
-		kind = btf_kind_of(t);
-		switch (kind) {
+		switch (btf_kind(t)) {
 		case BTF_KIND_PTR:
 		case BTF_KIND_VOLATILE:
 		case BTF_KIND_CONST:
@@ -1017,12 +987,9 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 		case BTF_KIND_FUNC_PROTO:
 			id = t->type;
 			break;
-		case BTF_KIND_ARRAY: {
-			const struct btf_array *a = (void *)(t + 1);
-
-			id = a->type;
+		case BTF_KIND_ARRAY:
+			id = btf_array(t)->type;
 			break;
-		}
 		case BTF_KIND_INT:
 		case BTF_KIND_ENUM:
 		case BTF_KIND_FWD:
@@ -1032,7 +999,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 			goto done;
 		default:
 			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
-				   kind, id);
+				   btf_kind(t), id);
 			goto done;
 		}
 	}
@@ -1070,7 +1037,7 @@ static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
 		id = decl_stack->ids[decl_stack->cnt - 1];
 		t = btf__type_by_id(d->btf, id);
 
-		switch (btf_kind_of(t)) {
+		switch (btf_kind(t)) {
 		case BTF_KIND_VOLATILE:
 			btf_dump_printf(d, "volatile ");
 			break;
@@ -1087,20 +1054,6 @@ static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
 	}
 }
 
-static bool btf_is_mod_kind(const struct btf *btf, __u32 id)
-{
-	const struct btf_type *t = btf__type_by_id(btf, id);
-
-	switch (btf_kind_of(t)) {
-	case BTF_KIND_VOLATILE:
-	case BTF_KIND_CONST:
-	case BTF_KIND_RESTRICT:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void btf_dump_emit_name(const struct btf_dump *d,
 			       const char *name, bool last_was_ptr)
 {
@@ -1139,7 +1092,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 		}
 
 		t = btf__type_by_id(d->btf, id);
-		kind = btf_kind_of(t);
+		kind = btf_kind(t);
 
 		switch (kind) {
 		case BTF_KIND_INT:
@@ -1185,7 +1138,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 			btf_dump_printf(d, " restrict");
 			break;
 		case BTF_KIND_ARRAY: {
-			const struct btf_array *a = (void *)(t + 1);
+			const struct btf_array *a = btf_array(t);
 			const struct btf_type *next_t;
 			__u32 next_id;
 			bool multidim;
@@ -1201,7 +1154,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 			 */
 			while (decls->cnt) {
 				next_id = decls->ids[decls->cnt - 1];
-				if (btf_is_mod_kind(d->btf, next_id))
+				next_t = btf__type_by_id(d->btf, next_id);
+				if (btf_is_mod(next_t))
 					decls->cnt--;
 				else
 					break;
@@ -1214,7 +1168,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 			}
 
 			next_t = btf__type_by_id(d->btf, next_id);
-			multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY;
+			multidim = btf_is_array(next_t);
 			/* we need space if we have named non-pointer */
 			if (fname[0] && !last_was_ptr)
 				btf_dump_printf(d, " ");
@@ -1228,8 +1182,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 			return;
 		}
 		case BTF_KIND_FUNC_PROTO: {
-			const struct btf_param *p = (void *)(t + 1);
-			__u16 vlen = btf_vlen_of(t);
+			const struct btf_param *p = btf_params(t);
+			__u16 vlen = btf_vlen(t);
 			int i;
 
 			btf_dump_emit_mods(d, decls);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2586b6cb8f34..2233f919dd88 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
+#include <sys/utsname.h>
 #include <tools/libc_compat.h>
 #include <libelf.h>
 #include <gelf.h>
@@ -48,6 +49,7 @@
 #include "btf.h"
 #include "str_error.h"
 #include "libbpf_internal.h"
+#include "hashmap.h"
 
 #ifndef EM_BPF
 #define EM_BPF 247
@@ -75,9 +77,12 @@ static int __base_pr(enum libbpf_print_level level, const char *format,
 
 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 
-void libbpf_set_print(libbpf_print_fn_t fn)
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 {
+	libbpf_print_fn_t old_print_fn = __libbpf_pr;
+
 	__libbpf_pr = fn;
+	return old_print_fn;
 }
 
 __printf(2, 3)
@@ -1013,23 +1018,21 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	return 0;
 }
 
-static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
-						     __u32 id)
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 {
 	const struct btf_type *t = btf__type_by_id(btf, id);
 
-	while (true) {
-		switch (BTF_INFO_KIND(t->info)) {
-		case BTF_KIND_VOLATILE:
-		case BTF_KIND_CONST:
-		case BTF_KIND_RESTRICT:
-		case BTF_KIND_TYPEDEF:
-			t = btf__type_by_id(btf, t->type);
-			break;
-		default:
-			return t;
-		}
+	if (res_id)
+		*res_id = id;
+
+	while (btf_is_mod(t) || btf_is_typedef(t)) {
+		if (res_id)
+			*res_id = t->type;
+		t = btf__type_by_id(btf, t->type);
 	}
+
+	return t;
 }
 
 /*
@@ -1042,14 +1045,14 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
 static bool get_map_field_int(const char *map_name, const struct btf *btf,
 			      const struct btf_type *def,
 			      const struct btf_member *m, __u32 *res) {
-	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
+	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
 	const char *name = btf__name_by_offset(btf, m->name_off);
 	const struct btf_array *arr_info;
 	const struct btf_type *arr_t;
 
-	if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+	if (!btf_is_ptr(t)) {
 		pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
-			   map_name, name, BTF_INFO_KIND(t->info));
+			   map_name, name, btf_kind(t));
 		return false;
 	}
 
@@ -1059,12 +1062,12 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
 			   map_name, name, t->type);
 		return false;
 	}
-	if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
+	if (!btf_is_array(arr_t)) {
 		pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
-			   map_name, name, BTF_INFO_KIND(arr_t->info));
+			   map_name, name, btf_kind(arr_t));
 		return false;
 	}
-	arr_info = (const void *)(arr_t + 1);
+	arr_info = btf_array(arr_t);
 	*res = arr_info->nelems;
 	return true;
 }
@@ -1082,11 +1085,11 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	struct bpf_map *map;
 	int vlen, i;
 
-	vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
+	vi = btf_var_secinfos(sec) + var_idx;
 	var = btf__type_by_id(obj->btf, vi->type);
-	var_extra = (const void *)(var + 1);
+	var_extra = btf_var(var);
 	map_name = btf__name_by_offset(obj->btf, var->name_off);
-	vlen = BTF_INFO_VLEN(var->info);
+	vlen = btf_vlen(var);
 
 	if (map_name == NULL || map_name[0] == '\0') {
 		pr_warning("map #%d: empty name.\n", var_idx);
@@ -1096,9 +1099,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		pr_warning("map '%s' BTF data is corrupted.\n", map_name);
 		return -EINVAL;
 	}
-	if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
+	if (!btf_is_var(var)) {
 		pr_warning("map '%s': unexpected var kind %u.\n",
-			   map_name, BTF_INFO_KIND(var->info));
+			   map_name, btf_kind(var));
 		return -EINVAL;
 	}
 	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
@@ -1108,10 +1111,10 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		return -EOPNOTSUPP;
 	}
 
-	def = skip_mods_and_typedefs(obj->btf, var->type);
-	if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
+	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+	if (!btf_is_struct(def)) {
 		pr_warning("map '%s': unexpected def kind %u.\n",
-			   map_name, BTF_INFO_KIND(var->info));
+			   map_name, btf_kind(var));
 		return -EINVAL;
 	}
 	if (def->size > vi->size) {
@@ -1134,8 +1137,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 		 map_name, map->sec_idx, map->sec_offset);
 
-	vlen = BTF_INFO_VLEN(def->info);
-	m = (const void *)(def + 1);
+	vlen = btf_vlen(def);
+	m = btf_members(def);
 	for (i = 0; i < vlen; i++, m++) {
 		const char *name = btf__name_by_offset(obj->btf, m->name_off);
 
@@ -1185,9 +1188,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 					   map_name, m->type);
 				return -EINVAL;
 			}
-			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+			if (!btf_is_ptr(t)) {
 				pr_warning("map '%s': key spec is not PTR: %u.\n",
-					   map_name, BTF_INFO_KIND(t->info));
+					   map_name, btf_kind(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
@@ -1228,9 +1231,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 					   map_name, m->type);
 				return -EINVAL;
 			}
-			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+			if (!btf_is_ptr(t)) {
 				pr_warning("map '%s': value spec is not PTR: %u.\n",
-					   map_name, BTF_INFO_KIND(t->info));
+					   map_name, btf_kind(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
@@ -1291,7 +1294,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
 	nr_types = btf__get_nr_types(obj->btf);
 	for (i = 1; i <= nr_types; i++) {
 		t = btf__type_by_id(obj->btf, i);
-		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+		if (!btf_is_datasec(t))
 			continue;
 		name = btf__name_by_offset(obj->btf, t->name_off);
 		if (strcmp(name, MAPS_ELF_SEC) == 0) {
@@ -1305,7 +1308,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
 		return -ENOENT;
 	}
 
-	vlen = BTF_INFO_VLEN(sec->info);
+	vlen = btf_vlen(sec);
 	for (i = 0; i < vlen; i++) {
 		err = bpf_object__init_user_btf_map(obj, sec, i,
 						    obj->efile.btf_maps_shndx,
@@ -1366,16 +1369,14 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 	struct btf *btf = obj->btf;
 	struct btf_type *t;
 	int i, j, vlen;
-	__u16 kind;
 
 	if (!obj->btf || (has_func && has_datasec))
 		return;
 
 	for (i = 1; i <= btf__get_nr_types(btf); i++) {
 		t = (struct btf_type *)btf__type_by_id(btf, i);
-		kind = BTF_INFO_KIND(t->info);
 
-		if (!has_datasec && kind == BTF_KIND_VAR) {
+		if (!has_datasec && btf_is_var(t)) {
 			/* replace VAR with INT */
 			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
 			/*
@@ -1384,11 +1385,11 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 			 * original variable took less than 4 bytes
 			 */
 			t->size = 1;
-			*(int *)(t+1) = BTF_INT_ENC(0, 0, 8);
-		} else if (!has_datasec && kind == BTF_KIND_DATASEC) {
+			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
+		} else if (!has_datasec && btf_is_datasec(t)) {
 			/* replace DATASEC with STRUCT */
-			struct btf_var_secinfo *v = (void *)(t + 1);
-			struct btf_member *m = (void *)(t + 1);
+			const struct btf_var_secinfo *v = btf_var_secinfos(t);
+			struct btf_member *m = btf_members(t);
 			struct btf_type *vt;
 			char *name;
 
@@ -1399,7 +1400,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 				name++;
 			}
 
-			vlen = BTF_INFO_VLEN(t->info);
+			vlen = btf_vlen(t);
 			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
 			for (j = 0; j < vlen; j++, v++, m++) {
 				/* order of field assignments is important */
@@ -1409,12 +1410,12 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 				vt = (void *)btf__type_by_id(btf, v->type);
 				m->name_off = vt->name_off;
 			}
-		} else if (!has_func && kind == BTF_KIND_FUNC_PROTO) {
+		} else if (!has_func && btf_is_func_proto(t)) {
 			/* replace FUNC_PROTO with ENUM */
-			vlen = BTF_INFO_VLEN(t->info);
+			vlen = btf_vlen(t);
 			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
 			t->size = sizeof(__u32); /* kernel enforced */
-		} else if (!has_func && kind == BTF_KIND_FUNC) {
+		} else if (!has_func && btf_is_func(t)) {
 			/* replace FUNC with TYPEDEF */
 			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
 		}
@@ -1772,15 +1773,22 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			 (long long) sym.st_value, sym.st_name, name);
 
 		shdr_idx = sym.st_shndx;
+		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+		pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n",
+			 insn_idx, shdr_idx);
+
+		if (shdr_idx >= SHN_LORESERVE) {
+			pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
+				   name, shdr_idx, insn_idx,
+				   insns[insn_idx].code);
+			return -LIBBPF_ERRNO__RELOC;
+		}
 		if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
 			pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
 				   prog->section_name, shdr_idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-		pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
 		if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
 			if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
 				pr_warning("incorrect bpf_call opcode\n");
@@ -2294,6 +2302,894 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
 	return 0;
 }
 
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+	__u32 type_id;		/* struct/union type or array element type */
+	__u32 idx;		/* field index or array index */
+	const char *name;	/* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+	const struct btf *btf;
+	/* high-level spec: named fields and array indices only */
+	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+	/* high-level spec length */
+	int len;
+	/* raw, low-level spec: 1-to-1 with accessor spec string */
+	int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+	/* raw spec length */
+	int raw_len;
+	/* field byte offset represented by spec */
+	__u32 offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+	return !s || !s[0];
+}
+
+/*
+ * Turn bpf_offset_reloc into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field offset (in bytes), specified by accessor string. Low-level spec
+ * captures every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ *   struct sample {
+ *       int __unimportant;
+ *       struct {
+ *           int __1;
+ *           int __2;
+ *           int a[7];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *
+ *   int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - field 'a' access (corresponds to '2' in low-level spec);
+ *   - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ */
+static int bpf_core_spec_parse(const struct btf *btf,
+			       __u32 type_id,
+			       const char *spec_str,
+			       struct bpf_core_spec *spec)
+{
+	int access_idx, parsed_len, i;
+	const struct btf_type *t;
+	const char *name;
+	__u32 id;
+	__s64 sz;
+
+	if (str_is_empty(spec_str) || *spec_str == ':')
+		return -EINVAL;
+
+	memset(spec, 0, sizeof(*spec));
+	spec->btf = btf;
+
+	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+	while (*spec_str) {
+		if (*spec_str == ':')
+			++spec_str;
+		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+			return -EINVAL;
+		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+			return -E2BIG;
+		spec_str += parsed_len;
+		spec->raw_spec[spec->raw_len++] = access_idx;
+	}
+
+	if (spec->raw_len == 0)
+		return -EINVAL;
+
+	/* first spec value is always reloc type array index */
+	t = skip_mods_and_typedefs(btf, type_id, &id);
+	if (!t)
+		return -EINVAL;
+
+	access_idx = spec->raw_spec[0];
+	spec->spec[0].type_id = id;
+	spec->spec[0].idx = access_idx;
+	spec->len++;
+
+	sz = btf__resolve_size(btf, id);
+	if (sz < 0)
+		return sz;
+	spec->offset = access_idx * sz;
+
+	for (i = 1; i < spec->raw_len; i++) {
+		t = skip_mods_and_typedefs(btf, id, &id);
+		if (!t)
+			return -EINVAL;
+
+		access_idx = spec->raw_spec[i];
+
+		if (btf_is_composite(t)) {
+			const struct btf_member *m;
+			__u32 offset;
+
+			if (access_idx >= btf_vlen(t))
+				return -EINVAL;
+			if (btf_member_bitfield_size(t, access_idx))
+				return -EINVAL;
+
+			offset = btf_member_bit_offset(t, access_idx);
+			if (offset % 8)
+				return -EINVAL;
+			spec->offset += offset / 8;
+
+			m = btf_members(t) + access_idx;
+			if (m->name_off) {
+				name = btf__name_by_offset(btf, m->name_off);
+				if (str_is_empty(name))
+					return -EINVAL;
+
+				spec->spec[spec->len].type_id = id;
+				spec->spec[spec->len].idx = access_idx;
+				spec->spec[spec->len].name = name;
+				spec->len++;
+			}
+
+			id = m->type;
+		} else if (btf_is_array(t)) {
+			const struct btf_array *a = btf_array(t);
+
+			t = skip_mods_and_typedefs(btf, a->type, &id);
+			if (!t || access_idx >= a->nelems)
+				return -EINVAL;
+
+			spec->spec[spec->len].type_id = id;
+			spec->spec[spec->len].idx = access_idx;
+			spec->len++;
+
+			sz = btf__resolve_size(btf, id);
+			if (sz < 0)
+				return sz;
+			spec->offset += access_idx * sz;
+		} else {
+			pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+				   type_id, spec_str, i, id, btf_kind(t));
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+	/* check X___Y name pattern, where X and Y are not underscores */
+	return s[0] != '_' &&				      /* X */
+	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
+	       s[4] != '_';				      /* Y */
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+	size_t n = strlen(name);
+	int i;
+
+	for (i = n - 5; i >= 0; i--) {
+		if (bpf_core_is_flavor_sep(name + i))
+			return i + 1;
+	}
+	return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+	__u32 *data;
+	int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+	free(cand_ids->data);
+	free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+					   __u32 local_type_id,
+					   const struct btf *targ_btf)
+{
+	size_t local_essent_len, targ_essent_len;
+	const char *local_name, *targ_name;
+	const struct btf_type *t;
+	struct ids_vec *cand_ids;
+	__u32 *new_ids;
+	int i, err, n;
+
+	t = btf__type_by_id(local_btf, local_type_id);
+	if (!t)
+		return ERR_PTR(-EINVAL);
+
+	local_name = btf__name_by_offset(local_btf, t->name_off);
+	if (str_is_empty(local_name))
+		return ERR_PTR(-EINVAL);
+	local_essent_len = bpf_core_essential_name_len(local_name);
+
+	cand_ids = calloc(1, sizeof(*cand_ids));
+	if (!cand_ids)
+		return ERR_PTR(-ENOMEM);
+
+	n = btf__get_nr_types(targ_btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(targ_btf, i);
+		targ_name = btf__name_by_offset(targ_btf, t->name_off);
+		if (str_is_empty(targ_name))
+			continue;
+
+		targ_essent_len = bpf_core_essential_name_len(targ_name);
+		if (targ_essent_len != local_essent_len)
+			continue;
+
+		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+			pr_debug("[%d] %s: found candidate [%d] %s\n",
+				 local_type_id, local_name, i, targ_name);
+			new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+			if (!new_ids) {
+				err = -ENOMEM;
+				goto err_out;
+			}
+			cand_ids->data = new_ids;
+			cand_ids->data[cand_ids->len++] = i;
+		}
+	}
+	return cand_ids;
+err_out:
+	bpf_core_free_cands(cand_ids);
+	return ERR_PTR(err);
+}
+
+/* Check two types for compatibility, skipping const/volatile/restrict and
+ * typedefs, to ensure we are relocating offset to the compatible entities:
+ *   - any two STRUCTs/UNIONs are compatible and can be mixed;
+ *   - any two FWDs are compatible;
+ *   - any two PTRs are always compatible;
+ *   - for ENUMs, check sizes, names are ignored;
+ *   - for INT, size and bitness should match, signedness is ignored;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+				      __u32 local_id,
+				      const struct btf *targ_btf,
+				      __u32 targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+
+recur:
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+		return 1;
+	if (btf_kind(local_type) != btf_kind(targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_FWD:
+	case BTF_KIND_PTR:
+		return 1;
+	case BTF_KIND_ENUM:
+		return local_type->size == targ_type->size;
+	case BTF_KIND_INT:
+		return btf_int_offset(local_type) == 0 &&
+		       btf_int_offset(targ_type) == 0 &&
+		       local_type->size == targ_type->size &&
+		       btf_int_bits(local_type) == btf_int_bits(targ_type);
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	default:
+		pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n",
+			   btf_kind(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+				 const struct bpf_core_accessor *local_acc,
+				 const struct btf *targ_btf,
+				 __u32 targ_id,
+				 struct bpf_core_spec *spec,
+				 __u32 *next_targ_id)
+{
+	const struct btf_type *local_type, *targ_type;
+	const struct btf_member *local_member, *m;
+	const char *local_name, *targ_name;
+	__u32 local_id;
+	int i, n, found;
+
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!targ_type)
+		return -EINVAL;
+	if (!btf_is_composite(targ_type))
+		return 0;
+
+	local_id = local_acc->type_id;
+	local_type = btf__type_by_id(local_btf, local_id);
+	local_member = btf_members(local_type) + local_acc->idx;
+	local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+	n = btf_vlen(targ_type);
+	m = btf_members(targ_type);
+	for (i = 0; i < n; i++, m++) {
+		__u32 offset;
+
+		/* bitfield relocations not supported */
+		if (btf_member_bitfield_size(targ_type, i))
+			continue;
+		offset = btf_member_bit_offset(targ_type, i);
+		if (offset % 8)
+			continue;
+
+		/* too deep struct/union/array nesting */
+		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+			return -E2BIG;
+
+		/* speculate this member will be the good one */
+		spec->offset += offset / 8;
+		spec->raw_spec[spec->raw_len++] = i;
+
+		targ_name = btf__name_by_offset(targ_btf, m->name_off);
+		if (str_is_empty(targ_name)) {
+			/* embedded struct/union, we need to go deeper */
+			found = bpf_core_match_member(local_btf, local_acc,
+						      targ_btf, m->type,
+						      spec, next_targ_id);
+			if (found) /* either found or error */
+				return found;
+		} else if (strcmp(local_name, targ_name) == 0) {
+			/* matching named field */
+			struct bpf_core_accessor *targ_acc;
+
+			targ_acc = &spec->spec[spec->len++];
+			targ_acc->type_id = targ_id;
+			targ_acc->idx = i;
+			targ_acc->name = targ_name;
+
+			*next_targ_id = m->type;
+			found = bpf_core_fields_are_compat(local_btf,
+							   local_member->type,
+							   targ_btf, m->type);
+			if (!found)
+				spec->len--; /* pop accessor */
+			return found;
+		}
+		/* member turned out not to be what we looked for */
+		spec->offset -= offset / 8;
+		spec->raw_len--;
+	}
+
+	return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+			       const struct btf *targ_btf, __u32 targ_id,
+			       struct bpf_core_spec *targ_spec)
+{
+	const struct btf_type *targ_type;
+	const struct bpf_core_accessor *local_acc;
+	struct bpf_core_accessor *targ_acc;
+	int i, sz, matched;
+
+	memset(targ_spec, 0, sizeof(*targ_spec));
+	targ_spec->btf = targ_btf;
+
+	local_acc = &local_spec->spec[0];
+	targ_acc = &targ_spec->spec[0];
+
+	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+						   &targ_id);
+		if (!targ_type)
+			return -EINVAL;
+
+		if (local_acc->name) {
+			matched = bpf_core_match_member(local_spec->btf,
+							local_acc,
+							targ_btf, targ_id,
+							targ_spec, &targ_id);
+			if (matched <= 0)
+				return matched;
+		} else {
+			/* for i=0, targ_id is already treated as array element
+			 * type (because it's the original struct), for others
+			 * we should find array element type first
+			 */
+			if (i > 0) {
+				const struct btf_array *a;
+
+				if (!btf_is_array(targ_type))
+					return 0;
+
+				a = btf_array(targ_type);
+				if (local_acc->idx >= a->nelems)
+					return 0;
+				if (!skip_mods_and_typedefs(targ_btf, a->type,
+							    &targ_id))
+					return -EINVAL;
+			}
+
+			/* too deep struct/union/array nesting */
+			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+				return -E2BIG;
+
+			targ_acc->type_id = targ_id;
+			targ_acc->idx = local_acc->idx;
+			targ_acc->name = NULL;
+			targ_spec->len++;
+			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+			targ_spec->raw_len++;
+
+			sz = btf__resolve_size(targ_btf, targ_id);
+			if (sz < 0)
+				return sz;
+			targ_spec->offset += local_acc->idx * sz;
+		}
+	}
+
+	return 1;
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ * Expected insn->imm value is provided for validation, as well as the new
+ * relocated value.
+ *
+ * Currently three kinds of BPF instructions are supported:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. *(rX) = <imm> (indirect memory assignment with immediate operand).
+ *
+ * If actual insn->imm value is wrong, bail out.
+ */
+static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,
+			       __u32 orig_off, __u32 new_off)
+{
+	struct bpf_insn *insn;
+	int insn_idx;
+	__u8 class;
+
+	if (insn_off % sizeof(struct bpf_insn))
+		return -EINVAL;
+	insn_idx = insn_off / sizeof(struct bpf_insn);
+
+	insn = &prog->insns[insn_idx];
+	class = BPF_CLASS(insn->code);
+
+	if (class == BPF_ALU || class == BPF_ALU64) {
+		if (BPF_SRC(insn->code) != BPF_K)
+			return -EINVAL;
+		if (insn->imm != orig_off)
+			return -EINVAL;
+		insn->imm = new_off;
+		pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n",
+			 bpf_program__title(prog, false),
+			 insn_idx, orig_off, new_off);
+	} else {
+		pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+			   bpf_program__title(prog, false),
+			   insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+			   insn->off, insn->imm);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct btf *btf_load_raw(const char *path)
+{
+	struct btf *btf;
+	size_t read_cnt;
+	struct stat st;
+	void *data;
+	FILE *f;
+
+	if (stat(path, &st))
+		return ERR_PTR(-errno);
+
+	data = malloc(st.st_size);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	f = fopen(path, "rb");
+	if (!f) {
+		btf = ERR_PTR(-errno);
+		goto cleanup;
+	}
+
+	read_cnt = fread(data, 1, st.st_size, f);
+	fclose(f);
+	if (read_cnt < st.st_size) {
+		btf = ERR_PTR(-EBADF);
+		goto cleanup;
+	}
+
+	btf = btf__new(data, read_cnt);
+
+cleanup:
+	free(data);
+	return btf;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+static struct btf *bpf_core_find_kernel_btf(void)
+{
+	struct {
+		const char *path_fmt;
+		bool raw_btf;
+	} locations[] = {
+		/* try canonical vmlinux BTF through sysfs first */
+		{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+		/* fall back to trying to find vmlinux ELF on disk otherwise */
+		{ "/boot/vmlinux-%1$s" },
+		{ "/lib/modules/%1$s/vmlinux-%1$s" },
+		{ "/lib/modules/%1$s/build/vmlinux" },
+		{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
+		{ "/usr/lib/debug/boot/vmlinux-%1$s" },
+		{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+		{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+	};
+	char path[PATH_MAX + 1];
+	struct utsname buf;
+	struct btf *btf;
+	int i;
+
+	uname(&buf);
+
+	for (i = 0; i < ARRAY_SIZE(locations); i++) {
+		snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+		if (access(path, R_OK))
+			continue;
+
+		if (locations[i].raw_btf)
+			btf = btf_load_raw(path);
+		else
+			btf = btf__parse_elf(path, NULL);
+
+		pr_debug("loading kernel BTF '%s': %ld\n",
+			 path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+		if (IS_ERR(btf))
+			continue;
+
+		return btf;
+	}
+
+	pr_warning("failed to find valid kernel BTF\n");
+	return ERR_PTR(-ESRCH);
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+	const struct btf_type *t;
+	const char *s;
+	__u32 type_id;
+	int i;
+
+	type_id = spec->spec[0].type_id;
+	t = btf__type_by_id(spec->btf, type_id);
+	s = btf__name_by_offset(spec->btf, t->name_off);
+	libbpf_print(level, "[%u] %s + ", type_id, s);
+
+	for (i = 0; i < spec->raw_len; i++)
+		libbpf_print(level, "%d%s", spec->raw_spec[i],
+			     i == spec->raw_len - 1 ? " => " : ":");
+
+	libbpf_print(level, "%u @ &x", spec->offset);
+
+	for (i = 0; i < spec->len; i++) {
+		if (spec->spec[i].name)
+			libbpf_print(level, ".%s", spec->spec[i].name);
+		else
+			libbpf_print(level, "[%u]", spec->spec[i].idx);
+	}
+
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+	return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
+
+static void *u32_as_hash_key(__u32 x)
+{
+	return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ *    Candidate type is a type with the same "essential" name, ignoring
+ *    everything after last triple underscore (___). E.g., `sample`,
+ *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ *    for each other. Names with triple underscore are referred to as
+ *    "flavors" and are useful, among other things, to allow to
+ *    specify/support incompatible variations of the same kernel struct, which
+ *    might differ between different kernel versions and/or build
+ *    configurations.
+ *
+ *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ *    converter, when deduplicated BTF of a kernel still contains more than
+ *    one different types with the same name. In that case, ___2, ___3, etc
+ *    are appended starting from second name conflict. But start flavors are
+ *    also useful to be defined "locally", in BPF program, to extract same
+ *    data from incompatible changes between different kernel
+ *    versions/configurations. For instance, to handle field renames between
+ *    kernel versions, one can use two flavors of the struct name with the
+ *    same common name and use conditional relocations to extract that field,
+ *    depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ *    candidate target type. Matching involves finding corresponding
+ *    high-level spec accessors, meaning that all named fields should match,
+ *    as well as all array accesses should be within the actual bounds. Also,
+ *    types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ *    matching the spec. As long as all the specs resolve to the same set of
+ *    offsets across all candidates, there is not error. If there is any
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    the same BTF type, if some directly or indirectly referenced (by
+ *    pointer) type gets resolved to different actual types in different
+ *    object files. If such situation occurs, deduplicated BTF will end up
+ *    with two (or more) structurally identical types, which differ only in
+ *    types they refer to through pointer. This should be OK in most cases and
+ *    is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ *    types in target BTF. It is anticipated that this is overall more
+ *    efficient memory-wise and not significantly worse (if not better)
+ *    CPU-wise compared to prebuilding a map from all local type names to
+ *    a list of candidate type names. It's also sped up by caching resolved
+ *    list of matching candidates per each local "root" type ID, that has at
+ *    least one bpf_offset_reloc associated with it. This list is shared
+ *    between multiple relocations for the same type ID and is updated as some
+ *    of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_reloc_offset(struct bpf_program *prog,
+				 const struct bpf_offset_reloc *relo,
+				 int relo_idx,
+				 const struct btf *local_btf,
+				 const struct btf *targ_btf,
+				 struct hashmap *cand_cache)
+{
+	const char *prog_name = bpf_program__title(prog, false);
+	struct bpf_core_spec local_spec, cand_spec, targ_spec;
+	const void *type_key = u32_as_hash_key(relo->type_id);
+	const struct btf_type *local_type, *cand_type;
+	const char *local_name, *cand_name;
+	struct ids_vec *cand_ids;
+	__u32 local_id, cand_id;
+	const char *spec_str;
+	int i, j, err;
+
+	local_id = relo->type_id;
+	local_type = btf__type_by_id(local_btf, local_id);
+	if (!local_type)
+		return -EINVAL;
+
+	local_name = btf__name_by_offset(local_btf, local_type->name_off);
+	if (str_is_empty(local_name))
+		return -EINVAL;
+
+	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+	if (str_is_empty(spec_str))
+		return -EINVAL;
+
+	err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+	if (err) {
+		pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
+			   prog_name, relo_idx, local_id, local_name, spec_str,
+			   err);
+		return -EINVAL;
+	}
+
+	pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx);
+	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+	libbpf_print(LIBBPF_DEBUG, "\n");
+
+	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+		if (IS_ERR(cand_ids)) {
+			pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
+				   prog_name, relo_idx, local_id, local_name,
+				   PTR_ERR(cand_ids));
+			return PTR_ERR(cand_ids);
+		}
+		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+		if (err) {
+			bpf_core_free_cands(cand_ids);
+			return err;
+		}
+	}
+
+	for (i = 0, j = 0; i < cand_ids->len; i++) {
+		cand_id = cand_ids->data[i];
+		cand_type = btf__type_by_id(targ_btf, cand_id);
+		cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
+
+		err = bpf_core_spec_match(&local_spec, targ_btf,
+					  cand_id, &cand_spec);
+		pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
+			 prog_name, relo_idx, i, cand_name);
+		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+		libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+		if (err < 0) {
+			pr_warning("prog '%s': relo #%d: matching error: %d\n",
+				   prog_name, relo_idx, err);
+			return err;
+		}
+		if (err == 0)
+			continue;
+
+		if (j == 0) {
+			targ_spec = cand_spec;
+		} else if (cand_spec.offset != targ_spec.offset) {
+			/* if there are many candidates, they should all
+			 * resolve to the same offset
+			 */
+			pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+				   prog_name, relo_idx, cand_spec.offset,
+				   targ_spec.offset);
+			return -EINVAL;
+		}
+
+		cand_ids->data[j++] = cand_spec.spec[0].type_id;
+	}
+
+	cand_ids->len = j;
+	if (cand_ids->len == 0) {
+		pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+			   prog_name, relo_idx, local_id, local_name, spec_str);
+		return -ESRCH;
+	}
+
+	err = bpf_core_reloc_insn(prog, relo->insn_off,
+				  local_spec.offset, targ_spec.offset);
+	if (err) {
+		pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+			   prog_name, relo_idx, relo->insn_off, err);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)
+{
+	const struct btf_ext_info_sec *sec;
+	const struct bpf_offset_reloc *rec;
+	const struct btf_ext_info *seg;
+	struct hashmap_entry *entry;
+	struct hashmap *cand_cache = NULL;
+	struct bpf_program *prog;
+	struct btf *targ_btf;
+	const char *sec_name;
+	int i, err = 0;
+
+	if (targ_btf_path)
+		targ_btf = btf__parse_elf(targ_btf_path, NULL);
+	else
+		targ_btf = bpf_core_find_kernel_btf();
+	if (IS_ERR(targ_btf)) {
+		pr_warning("failed to get target BTF: %ld\n",
+			   PTR_ERR(targ_btf));
+		return PTR_ERR(targ_btf);
+	}
+
+	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+	if (IS_ERR(cand_cache)) {
+		err = PTR_ERR(cand_cache);
+		goto out;
+	}
+
+	seg = &obj->btf_ext->offset_reloc_info;
+	for_each_btf_ext_sec(seg, sec) {
+		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+		if (str_is_empty(sec_name)) {
+			err = -EINVAL;
+			goto out;
+		}
+		prog = bpf_object__find_program_by_title(obj, sec_name);
+		if (!prog) {
+			pr_warning("failed to find program '%s' for CO-RE offset relocation\n",
+				   sec_name);
+			err = -EINVAL;
+			goto out;
+		}
+
+		pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+			 sec_name, sec->num_info);
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			err = bpf_core_reloc_offset(prog, rec, i, obj->btf,
+						    targ_btf, cand_cache);
+			if (err) {
+				pr_warning("prog '%s': relo #%d: failed to relocate: %d\n",
+					   sec_name, i, err);
+				goto out;
+			}
+		}
+	}
+
+out:
+	btf__free(targ_btf);
+	if (!IS_ERR_OR_NULL(cand_cache)) {
+		hashmap__for_each_entry(cand_cache, entry, i) {
+			bpf_core_free_cands(entry->value);
+		}
+		hashmap__free(cand_cache);
+	}
+	return err;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+	int err = 0;
+
+	if (obj->btf_ext->offset_reloc_info.len)
+		err = bpf_core_reloc_offsets(obj, targ_btf_path);
+
+	return err;
+}
+
 static int
 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
 			struct reloc_desc *relo)
@@ -2401,14 +3297,21 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 	return 0;
 }
 
-
 static int
-bpf_object__relocate(struct bpf_object *obj)
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
 	struct bpf_program *prog;
 	size_t i;
 	int err;
 
+	if (obj->btf_ext) {
+		err = bpf_object__relocate_core(obj, targ_btf_path);
+		if (err) {
+			pr_warning("failed to perform CO-RE relocations: %d\n",
+				   err);
+			return err;
+		}
+	}
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
 
@@ -2809,7 +3712,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	obj->loaded = true;
 
 	CHECK_ERR(bpf_object__create_maps(obj), err, out);
-	CHECK_ERR(bpf_object__relocate(obj), err, out);
+	CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
 	CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
 
 	return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5cbf459ece0b..e8f70977d137 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -57,7 +57,7 @@ enum libbpf_print_level {
 typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
 				 const char *, va_list ap);
 
-LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn);
+LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
 
 /* Hide internal to user */
 struct bpf_object;
@@ -92,6 +92,7 @@ LIBBPF_API void bpf_object__close(struct bpf_object *object);
 struct bpf_object_load_attr {
 	struct bpf_object *obj;
 	int log_level;
+	const char *target_btf_path;
 };
 
 /* Load/unload object into/from kernel */
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 2ac29bd36226..2e83a34f8c79 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -29,6 +29,10 @@
 #ifndef max
 # define max(x, y) ((x) < (y) ? (y) : (x))
 #endif
+#ifndef offsetofend
+# define offsetofend(TYPE, FIELD) \
+	(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
 
 extern void libbpf_print(enum libbpf_print_level level,
 			 const char *format, ...)
@@ -46,4 +50,105 @@ do {				\
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len);
 
+struct btf_ext_info {
+	/*
+	 * info points to the individual info section (e.g. func_info and
+	 * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+	 */
+	void *info;
+	__u32 rec_size;
+	__u32 len;
+};
+
+#define for_each_btf_ext_sec(seg, sec)					\
+	for (sec = (seg)->info;						\
+	     (void *)sec < (seg)->info + (seg)->len;			\
+	     sec = (void *)sec + sizeof(struct btf_ext_info_sec) +	\
+		   (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec)				\
+	for (i = 0, rec = (void *)&(sec)->data;				\
+	     i < (sec)->num_info;					\
+	     i++, rec = (void *)rec + (seg)->rec_size)
+
+struct btf_ext {
+	union {
+		struct btf_ext_header *hdr;
+		void *data;
+	};
+	struct btf_ext_info func_info;
+	struct btf_ext_info line_info;
+	struct btf_ext_info offset_reloc_info;
+	__u32 data_size;
+};
+
+struct btf_ext_info_sec {
+	__u32	sec_name_off;
+	__u32	num_info;
+	/* Followed by num_info * record_size number of bytes */
+	__u8	data[0];
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+	__u32   insn_off;
+	__u32   type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+	__u32	insn_off;
+	__u32	file_name_off;
+	__u32	line_off;
+	__u32	line_col;
+};
+
+/* The minimum bpf_offset_reloc checked by the loader
+ *
+ * Offset relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual offset;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   offset;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   itself encodes an accessed field using a sequence of field and array
+ *   indicies, separated by colon (:). It's conceptually very close to LLVM's
+ *   getelementptr ([0]) instruction's arguments for identifying offset to 
+ *   a field.
+ *
+ * Example to provide a better feel.
+ *
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, 
+ *                      // b is field #0 inside anon struct, accessing elem #5)
+ *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example  will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *		  __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit offset relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_offset_reloc {
+	__u32   insn_off;
+	__u32   type_id;
+	__u32   access_str_off;
+};
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index ace1a0708d99..4b0b0364f5fc 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -244,6 +244,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 	case BPF_MAP_TYPE_HASH_OF_MAPS:
 	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH:
 	case BPF_MAP_TYPE_SOCKMAP:
 	case BPF_MAP_TYPE_CPUMAP:
 	case BPF_MAP_TYPE_XSKMAP:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c085964e1d05..29001f944db7 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -236,18 +236,12 @@ PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h
 PROG_TESTS_FILES := $(wildcard prog_tests/*.c)
 test_progs.c: $(PROG_TESTS_H)
 $(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
-$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES)
+$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_FILES) | $(PROG_TESTS_H)
 $(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR)
 	$(shell ( cd prog_tests/; \
 		  echo '/* Generated header, do not edit */'; \
-		  echo '#ifdef DECLARE'; \
 		  ls *.c 2> /dev/null | \
-			sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \
-		  echo '#endif'; \
-		  echo '#ifdef CALL'; \
-		  ls *.c 2> /dev/null | \
-			sed -e 's@\([^\.]*\)\.c@test_\1();@'; \
-		  echo '#endif' \
+			sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
 		 ) > $(PROG_TESTS_H))
 
 MAP_TESTS_DIR = $(OUTPUT)/map_tests
@@ -257,7 +251,7 @@ MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h
 MAP_TESTS_FILES := $(wildcard map_tests/*.c)
 test_maps.c: $(MAP_TESTS_H)
 $(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS)
-$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES)
+$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H)
 $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR)
 	$(shell ( cd map_tests/; \
 		  echo '/* Generated header, do not edit */'; \
@@ -278,7 +272,7 @@ VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h
 VERIFIER_TEST_FILES := $(wildcard verifier/*.c)
 test_verifier.c: $(VERIFIER_TESTS_H)
 $(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
-$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H)
+$(OUTPUT)/test_verifier: test_verifier.c | $(VERIFIER_TEST_FILES) $(VERIFIER_TESTS_H)
 $(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR)
 	$(shell ( cd verifier/; \
 		  echo '/* Generated header, do not edit */'; \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index f804f210244e..8b503ea142f0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -228,6 +228,9 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
 static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
 	(void *)BPF_FUNC_sk_storage_delete;
 static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
+static long long (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *ip,
+					  int ip_len, void *tcp, int tcp_len) =
+	(void *) BPF_FUNC_tcp_gen_syncookie;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -501,4 +504,24 @@ struct pt_regs;
 				(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
+/*
+ * BPF_CORE_READ abstracts away bpf_probe_read() call and captures offset
+ * relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define BPF_CORE_READ(dst, src)						\
+	bpf_probe_read((dst), sizeof(*(src)),				\
+		       __builtin_preserve_access_index(src))
+
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index cb827383db4d..fb5840a62548 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -106,8 +106,8 @@ void test_bpf_obj_id(void)
 		if (CHECK(err ||
 			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
 			  info_len != sizeof(struct bpf_prog_info) ||
-			  (jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  (jit_enabled &&
+			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
+			  (env.jit_enabled &&
 			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
 			  !prog_infos[i].xlated_prog_len ||
 			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
@@ -121,7 +121,7 @@ void test_bpf_obj_id(void)
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
-			  jit_enabled,
+			  env.jit_enabled,
 			  prog_infos[i].jited_prog_len,
 			  prog_infos[i].xlated_prog_len,
 			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e1b55261526f..1a1eae356f81 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -4,12 +4,15 @@
 static int libbpf_debug_print(enum libbpf_print_level level,
 			      const char *format, va_list args)
 {
-	if (level != LIBBPF_DEBUG)
-		return vfprintf(stderr, format, args);
+	if (level != LIBBPF_DEBUG) {
+		vprintf(format, args);
+		return 0;
+	}
 
 	if (!strstr(format, "verifier log"))
 		return 0;
-	return vfprintf(stderr, "%s", args);
+	vprintf("%s", args);
+	return 0;
 }
 
 static int check_load(const char *file, enum bpf_prog_type type)
@@ -30,14 +33,25 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	return err;
 }
 
+struct scale_test_def {
+	const char *file;
+	enum bpf_prog_type attach_type;
+	bool fails;
+};
+
 void test_bpf_verif_scale(void)
 {
-	const char *sched_cls[] = {
-		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
-	};
-	const char *raw_tp[] = {
+	struct scale_test_def tests[] = {
+		{ "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ },
+
+		{ "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
+
 		/* full unroll by llvm */
-		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o",
+		{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
 		/* partial unroll. llvm will unroll loop ~150 times.
 		 * C loop count -> 600.
@@ -45,7 +59,7 @@ void test_bpf_verif_scale(void)
 		 * 16k insns in loop body.
 		 * Total of 5 such loops. Total program size ~82k insns.
 		 */
-		"./pyperf600.o",
+		{ "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
 		/* no unroll at all.
 		 * C loop count -> 600.
@@ -53,48 +67,52 @@ void test_bpf_verif_scale(void)
 		 * ~110 insns in loop body.
 		 * Total of 5 such loops. Total program size ~1500 insns.
 		 */
-		"./pyperf600_nounroll.o",
+		{ "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
-		"./loop1.o", "./loop2.o",
+		{ "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
 
 		/* partial unroll. 19k insn in a loop.
 		 * Total program size 20.8k insn.
 		 * ~350k processed_insns
 		 */
-		"./strobemeta.o",
+		{ "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
 		/* no unroll, tiny loops */
-		"./strobemeta_nounroll1.o",
-		"./strobemeta_nounroll2.o",
-	};
-	const char *cg_sysctl[] = {
-		"./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
-	};
-	int err, i;
+		{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
-	if (verifier_stats)
-		libbpf_set_print(libbpf_debug_print);
+		{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
+		{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
 
-	err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
-	printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+		{ "test_xdp_loop.o", BPF_PROG_TYPE_XDP },
+		{ "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL },
+	};
+	libbpf_print_fn_t old_print_fn = NULL;
+	int err, i;
 
-	for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
-		err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
-		printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
+	if (env.verifier_stats) {
+		test__force_log();
+		old_print_fn = libbpf_set_print(libbpf_debug_print);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
-		err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
-		printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
-	}
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		const struct scale_test_def *test = &tests[i];
+
+		if (!test__start_subtest(test->file))
+			continue;
 
-	for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
-		err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
-		printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+		err = check_load(test->file, test->attach_type);
+		if (test->fails) { /* expected to fail */
+			if (err)
+				error_cnt--;
+			else
+				error_cnt++;
+		}
 	}
-	err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
-	printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
 
-	err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
-	printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
+	if (env.verifier_stats)
+		libbpf_set_print(old_print_fn);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
new file mode 100644
index 000000000000..f3863f976a48
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "progs/core_reloc_types.h"
+
+#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
+
+#define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
+	.a = 42,							\
+	.b = 0xc001,							\
+	.c = 0xbeef,							\
+}
+
+#define FLAVORS_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_flavors.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define FLAVORS_CASE(name) {						\
+	FLAVORS_CASE_COMMON(name),					\
+	.input = FLAVORS_DATA(core_reloc_##name),			\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = FLAVORS_DATA(core_reloc_flavors),			\
+	.output_len = sizeof(struct core_reloc_flavors),		\
+}
+
+#define FLAVORS_ERR_CASE(name) {					\
+	FLAVORS_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define NESTING_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
+	.a = { .a = { .a = 42 } },					\
+	.b = { .b = { .b = 0xc001 } },					\
+}
+
+#define NESTING_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_nesting.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define NESTING_CASE(name) {						\
+	NESTING_CASE_COMMON(name),					\
+	.input = NESTING_DATA(core_reloc_##name),			\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = NESTING_DATA(core_reloc_nesting),			\
+	.output_len = sizeof(struct core_reloc_nesting)			\
+}
+
+#define NESTING_ERR_CASE(name) {					\
+	NESTING_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
+	.a = { [2] = 1 },						\
+	.b = { [1] = { [2] = { [3] = 2 } } },				\
+	.c = { [1] = { .c =  3 } },					\
+	.d = { [0] = { [0] = { .d = 4 } } },				\
+}
+
+#define ARRAYS_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_arrays.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define ARRAYS_CASE(name) {						\
+	ARRAYS_CASE_COMMON(name),					\
+	.input = ARRAYS_DATA(core_reloc_##name),			\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) {	\
+		.a2   = 1,						\
+		.b123 = 2,						\
+		.c1c  = 3,						\
+		.d00d = 4,						\
+	},								\
+	.output_len = sizeof(struct core_reloc_arrays_output)		\
+}
+
+#define ARRAYS_ERR_CASE(name) {						\
+	ARRAYS_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define PRIMITIVES_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
+	.a = 1,								\
+	.b = 2,								\
+	.c = 3,								\
+	.d = (void *)4,							\
+	.f = (void *)5,							\
+}
+
+#define PRIMITIVES_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_primitives.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define PRIMITIVES_CASE(name) {						\
+	PRIMITIVES_CASE_COMMON(name),					\
+	.input = PRIMITIVES_DATA(core_reloc_##name),			\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = PRIMITIVES_DATA(core_reloc_primitives),		\
+	.output_len = sizeof(struct core_reloc_primitives),		\
+}
+
+#define PRIMITIVES_ERR_CASE(name) {					\
+	PRIMITIVES_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define MODS_CASE(name) {						\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_mods.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) {		\
+		.a = 1,							\
+		.b = 2,							\
+		.c = (void *)3,						\
+		.d = (void *)4,						\
+		.e = { [2] = 5 },					\
+		.f = { [1] = 6 },					\
+		.g = { .x = 7 },					\
+		.h = { .y = 8 },					\
+	},								\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_mods_output) {		\
+		.a = 1, .b = 2, .c = 3, .d = 4,				\
+		.e = 5, .f = 6, .g = 7, .h = 8,				\
+	},								\
+	.output_len = sizeof(struct core_reloc_mods_output),		\
+}
+
+#define PTR_AS_ARR_CASE(name) {						\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_ptr_as_arr.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.input = (const char *)&(struct core_reloc_##name []){		\
+		{ .a = 1 },						\
+		{ .a = 2 },						\
+		{ .a = 3 },						\
+	},								\
+	.input_len = 3 * sizeof(struct core_reloc_##name),		\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_ptr_as_arr) {		\
+		.a = 3,							\
+	},								\
+	.output_len = sizeof(struct core_reloc_ptr_as_arr),		\
+}
+
+#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
+	.u8_field = 1,							\
+	.s8_field = 2,							\
+	.u16_field = 3,							\
+	.s16_field = 4,							\
+	.u32_field = 5,							\
+	.s32_field = 6,							\
+	.u64_field = 7,							\
+	.s64_field = 8,							\
+}
+
+#define INTS_CASE_COMMON(name)						\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_ints.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"
+
+#define INTS_CASE(name) {						\
+	INTS_CASE_COMMON(name),						\
+	.input = INTS_DATA(core_reloc_##name),				\
+	.input_len = sizeof(struct core_reloc_##name),			\
+	.output = INTS_DATA(core_reloc_ints),				\
+	.output_len = sizeof(struct core_reloc_ints),			\
+}
+
+#define INTS_ERR_CASE(name) {						\
+	INTS_CASE_COMMON(name),						\
+	.fails = true,							\
+}
+
+struct core_reloc_test_case {
+	const char *case_name;
+	const char *bpf_obj_file;
+	const char *btf_src_file;
+	const char *input;
+	int input_len;
+	const char *output;
+	int output_len;
+	bool fails;
+};
+
+static struct core_reloc_test_case test_cases[] = {
+	/* validate we can find kernel image and use its BTF for relocs */
+	{
+		.case_name = "kernel",
+		.bpf_obj_file = "test_core_reloc_kernel.o",
+		.btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
+		.input = "",
+		.input_len = 0,
+		.output = "\1", /* true */
+		.output_len = 1,
+	},
+
+	/* validate BPF program can use multiple flavors to match against
+	 * single target BTF type
+	 */
+	FLAVORS_CASE(flavors),
+
+	FLAVORS_ERR_CASE(flavors__err_wrong_name),
+
+	/* various struct/enum nesting and resolution scenarios */
+	NESTING_CASE(nesting),
+	NESTING_CASE(nesting___anon_embed),
+	NESTING_CASE(nesting___struct_union_mixup),
+	NESTING_CASE(nesting___extra_nesting),
+	NESTING_CASE(nesting___dup_compat_types),
+
+	NESTING_ERR_CASE(nesting___err_missing_field),
+	NESTING_ERR_CASE(nesting___err_array_field),
+	NESTING_ERR_CASE(nesting___err_missing_container),
+	NESTING_ERR_CASE(nesting___err_nonstruct_container),
+	NESTING_ERR_CASE(nesting___err_array_container),
+	NESTING_ERR_CASE(nesting___err_dup_incompat_types),
+	NESTING_ERR_CASE(nesting___err_partial_match_dups),
+	NESTING_ERR_CASE(nesting___err_too_deep),
+
+	/* various array access relocation scenarios */
+	ARRAYS_CASE(arrays),
+	ARRAYS_CASE(arrays___diff_arr_dim),
+	ARRAYS_CASE(arrays___diff_arr_val_sz),
+
+	ARRAYS_ERR_CASE(arrays___err_too_small),
+	ARRAYS_ERR_CASE(arrays___err_too_shallow),
+	ARRAYS_ERR_CASE(arrays___err_non_array),
+	ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
+	ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+
+	/* enum/ptr/int handling scenarios */
+	PRIMITIVES_CASE(primitives),
+	PRIMITIVES_CASE(primitives___diff_enum_def),
+	PRIMITIVES_CASE(primitives___diff_func_proto),
+	PRIMITIVES_CASE(primitives___diff_ptr_type),
+
+	PRIMITIVES_ERR_CASE(primitives___err_non_enum),
+	PRIMITIVES_ERR_CASE(primitives___err_non_int),
+	PRIMITIVES_ERR_CASE(primitives___err_non_ptr),
+
+	/* const/volatile/restrict and typedefs scenarios */
+	MODS_CASE(mods),
+	MODS_CASE(mods___mod_swap),
+	MODS_CASE(mods___typedefs),
+
+	/* handling "ptr is an array" semantics */
+	PTR_AS_ARR_CASE(ptr_as_arr),
+	PTR_AS_ARR_CASE(ptr_as_arr___diff_sz),
+
+	/* int signedness/sizing/bitfield handling */
+	INTS_CASE(ints),
+	INTS_CASE(ints___bool),
+	INTS_CASE(ints___reverse_sign),
+
+	INTS_ERR_CASE(ints___err_bitfield),
+	INTS_ERR_CASE(ints___err_wrong_sz_8),
+	INTS_ERR_CASE(ints___err_wrong_sz_16),
+	INTS_ERR_CASE(ints___err_wrong_sz_32),
+	INTS_ERR_CASE(ints___err_wrong_sz_64),
+	
+	/* validate edge cases of capturing relocations */
+	{
+		.case_name = "misc",
+		.bpf_obj_file = "test_core_reloc_misc.o",
+		.btf_src_file = "btf__core_reloc_misc.o",
+		.input = (const char *)&(struct core_reloc_misc_extensible[]){
+			{ .a = 1 },
+			{ .a = 2 }, /* not read */
+			{ .a = 3 },
+		},
+		.input_len = 4 * sizeof(int),
+		.output = STRUCT_TO_CHAR_PTR(core_reloc_misc_output) {
+			.a = 1,
+			.b = 1,
+			.c = 0, /* BUG in clang, should be 3 */
+		},
+		.output_len = sizeof(struct core_reloc_misc_output),
+	},
+};
+
+struct data {
+	char in[256];
+	char out[256];
+};
+
+void test_core_reloc(void)
+{
+	const char *probe_name = "raw_tracepoint/sys_enter";
+	struct bpf_object_load_attr load_attr = {};
+	struct core_reloc_test_case *test_case;
+	int err, duration = 0, i, equal;
+	struct bpf_link *link = NULL;
+	struct bpf_map *data_map;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	const int zero = 0;
+	struct data data;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		test_case = &test_cases[i];
+
+		if (!test__start_subtest(test_case->case_name))
+			continue;
+
+		obj = bpf_object__open(test_case->bpf_obj_file);
+		if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+			  "failed to open '%s': %ld\n",
+			  test_case->bpf_obj_file, PTR_ERR(obj)))
+			continue;
+
+		prog = bpf_object__find_program_by_title(obj, probe_name);
+		if (CHECK(!prog, "find_probe",
+			  "prog '%s' not found\n", probe_name))
+			goto cleanup;
+		bpf_program__set_type(prog, BPF_PROG_TYPE_RAW_TRACEPOINT);
+
+		load_attr.obj = obj;
+		load_attr.log_level = 0;
+		load_attr.target_btf_path = test_case->btf_src_file;
+		err = bpf_object__load_xattr(&load_attr);
+		if (test_case->fails) {
+			CHECK(!err, "obj_load_fail",
+			      "should fail to load prog '%s'\n", probe_name);
+			goto cleanup;
+		} else {
+			if (CHECK(err, "obj_load",
+				  "failed to load prog '%s': %d\n",
+				  probe_name, err))
+				goto cleanup;
+		}
+
+		link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+		if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+			  PTR_ERR(link)))
+			goto cleanup;
+
+		data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+		if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+			goto cleanup;
+
+		memset(&data, 0, sizeof(data));
+		memcpy(data.in, test_case->input, test_case->input_len);
+
+		err = bpf_map_update_elem(bpf_map__fd(data_map),
+					  &zero, &data, 0);
+		if (CHECK(err, "update_data_map",
+			  "failed to update .data map: %d\n", err))
+			goto cleanup;
+
+		/* trigger test run */
+		usleep(1);
+
+		err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &data);
+		if (CHECK(err, "get_result",
+			  "failed to get output data: %d\n", err))
+			goto cleanup;
+
+		equal = memcmp(data.out, test_case->output,
+			       test_case->output_len) == 0;
+		if (CHECK(!equal, "check_result",
+			  "input/output data don't match\n")) {
+			int j;
+
+			for (j = 0; j < test_case->input_len; j++) {
+				printf("input byte #%d: 0x%02hhx\n",
+				       j, test_case->input[j]);
+			}
+			for (j = 0; j < test_case->output_len; j++) {
+				printf("output byte #%d: EXP 0x%02hhx GOT 0x%02hhx\n",
+				       j, test_case->output[j], data.out[j]);
+			}
+			goto cleanup;
+		}
+
+cleanup:
+		if (!IS_ERR_OR_NULL(link)) {
+			bpf_link__destroy(link);
+			link = NULL;
+		}
+		bpf_object__close(obj);
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index c938283ac232..6892b88ae065 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -5,6 +5,10 @@
 #include <linux/if_tun.h>
 #include <sys/uio.h>
 
+#ifndef IP_MF
+#define IP_MF 0x2000
+#endif
+
 #define CHECK_FLOW_KEYS(desc, got, expected)				\
 	CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0,		\
 	      desc,							\
@@ -16,6 +20,7 @@
 	      "is_encap=%u/%u "						\
 	      "ip_proto=0x%x/0x%x "					\
 	      "n_proto=0x%x/0x%x "					\
+	      "flow_label=0x%x/0x%x "					\
 	      "sport=%u/%u "						\
 	      "dport=%u/%u\n",						\
 	      got.nhoff, expected.nhoff,				\
@@ -26,6 +31,7 @@
 	      got.is_encap, expected.is_encap,				\
 	      got.ip_proto, expected.ip_proto,				\
 	      got.n_proto, expected.n_proto,				\
+	      got.flow_label, expected.flow_label,			\
 	      got.sport, expected.sport,				\
 	      got.dport, expected.dport)
 
@@ -35,6 +41,13 @@ struct ipv4_pkt {
 	struct tcphdr tcp;
 } __packed;
 
+struct ipip_pkt {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct iphdr iph_inner;
+	struct tcphdr tcp;
+} __packed;
+
 struct svlan_ipv4_pkt {
 	struct ethhdr eth;
 	__u16 vlan_tci;
@@ -49,6 +62,18 @@ struct ipv6_pkt {
 	struct tcphdr tcp;
 } __packed;
 
+struct ipv6_frag_pkt {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct frag_hdr {
+		__u8 nexthdr;
+		__u8 reserved;
+		__be16 frag_off;
+		__be32 identification;
+	} ipf;
+	struct tcphdr tcp;
+} __packed;
+
 struct dvlan_ipv6_pkt {
 	struct ethhdr eth;
 	__u16 vlan_tci;
@@ -64,10 +89,13 @@ struct test {
 	union {
 		struct ipv4_pkt ipv4;
 		struct svlan_ipv4_pkt svlan_ipv4;
+		struct ipip_pkt ipip;
 		struct ipv6_pkt ipv6;
+		struct ipv6_frag_pkt ipv6_frag;
 		struct dvlan_ipv6_pkt dvlan_ipv6;
 	} pkt;
 	struct bpf_flow_keys keys;
+	__u32 flags;
 };
 
 #define VLAN_HLEN	4
@@ -81,6 +109,8 @@ struct test tests[] = {
 			.iph.protocol = IPPROTO_TCP,
 			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
 			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
 		},
 		.keys = {
 			.nhoff = ETH_HLEN,
@@ -88,6 +118,8 @@ struct test tests[] = {
 			.addr_proto = ETH_P_IP,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.sport = 80,
+			.dport = 8080,
 		},
 	},
 	{
@@ -97,6 +129,8 @@ struct test tests[] = {
 			.iph.nexthdr = IPPROTO_TCP,
 			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
 			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
 		},
 		.keys = {
 			.nhoff = ETH_HLEN,
@@ -104,6 +138,8 @@ struct test tests[] = {
 			.addr_proto = ETH_P_IPV6,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.sport = 80,
+			.dport = 8080,
 		},
 	},
 	{
@@ -115,6 +151,8 @@ struct test tests[] = {
 			.iph.protocol = IPPROTO_TCP,
 			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
 			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
 		},
 		.keys = {
 			.nhoff = ETH_HLEN + VLAN_HLEN,
@@ -122,6 +160,8 @@ struct test tests[] = {
 			.addr_proto = ETH_P_IP,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.sport = 80,
+			.dport = 8080,
 		},
 	},
 	{
@@ -133,6 +173,8 @@ struct test tests[] = {
 			.iph.nexthdr = IPPROTO_TCP,
 			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
 			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
 		},
 		.keys = {
 			.nhoff = ETH_HLEN + VLAN_HLEN * 2,
@@ -141,8 +183,206 @@ struct test tests[] = {
 			.addr_proto = ETH_P_IPV6,
 			.ip_proto = IPPROTO_TCP,
 			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.sport = 80,
+			.dport = 8080,
 		},
 	},
+	{
+		.name = "ipv4-frag",
+		.pkt.ipv4 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_TCP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.frag_off = __bpf_constant_htons(IP_MF),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_frag = true,
+			.is_first_frag = true,
+			.sport = 80,
+			.dport = 8080,
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+	},
+	{
+		.name = "ipv4-no-frag",
+		.pkt.ipv4 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_TCP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.frag_off = __bpf_constant_htons(IP_MF),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_frag = true,
+			.is_first_frag = true,
+		},
+	},
+	{
+		.name = "ipv6-frag",
+		.pkt.ipv6_frag = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_FRAGMENT,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.ipf.nexthdr = IPPROTO_TCP,
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr) +
+				sizeof(struct frag_hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.is_frag = true,
+			.is_first_frag = true,
+			.sport = 80,
+			.dport = 8080,
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+	},
+	{
+		.name = "ipv6-no-frag",
+		.pkt.ipv6_frag = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_FRAGMENT,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.ipf.nexthdr = IPPROTO_TCP,
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr) +
+				sizeof(struct frag_hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.is_frag = true,
+			.is_first_frag = true,
+		},
+	},
+	{
+		.name = "ipv6-flow-label",
+		.pkt.ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.flow_lbl = { 0xb, 0xee, 0xef },
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.sport = 80,
+			.dport = 8080,
+			.flow_label = __bpf_constant_htonl(0xbeeef),
+		},
+	},
+	{
+		.name = "ipv6-no-flow-label",
+		.pkt.ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.flow_lbl = { 0xb, 0xee, 0xef },
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.flow_label = __bpf_constant_htonl(0xbeeef),
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+	},
+	{
+		.name = "ipip-encap",
+		.pkt.ipip = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_IPIP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES) -
+				sizeof(struct iphdr),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = 0,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr) +
+				sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+			.sport = 80,
+			.dport = 8080,
+		},
+	},
+	{
+		.name = "ipip-no-encap",
+		.pkt.ipip = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_IPIP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES) -
+				sizeof(struct iphdr),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_IPIP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+	},
 };
 
 static int create_tap(const char *ifname)
@@ -225,6 +465,13 @@ void test_flow_dissector(void)
 			.data_size_in = sizeof(tests[i].pkt),
 			.data_out = &flow_keys,
 		};
+		static struct bpf_flow_keys ctx = {};
+
+		if (tests[i].flags) {
+			tattr.ctx_in = &ctx;
+			tattr.ctx_size_in = sizeof(ctx);
+			ctx.flags = tests[i].flags;
+		}
 
 		err = bpf_prog_test_run_xattr(&tattr);
 		CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
@@ -251,9 +498,20 @@ void test_flow_dissector(void)
 	CHECK(err, "ifup", "err %d errno %d\n", err, errno);
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		struct bpf_flow_keys flow_keys = {};
+		/* Keep in sync with 'flags' from eth_get_headlen. */
+		__u32 eth_get_headlen_flags =
+			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 		struct bpf_prog_test_run_attr tattr = {};
-		__u32 key = 0;
+		struct bpf_flow_keys flow_keys = {};
+		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
+			    tests[i].keys.dport;
+
+		/* For skb-less case we can't pass input flags; run
+		 * only the tests that have a matching set of flags.
+		 */
+
+		if (tests[i].flags != eth_get_headlen_flags)
+			continue;
 
 		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
 		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
@@ -263,6 +521,9 @@ void test_flow_dissector(void)
 
 		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+		err = bpf_map_delete_elem(keys_fd, &key);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
 	}
 
 	bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index c2a0a9d5591b..3d59b3c841fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -1,8 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
 #include <test_progs.h>
 
 #define MAX_CNT_RAWTP	10ull
 #define MAX_STACK_RAWTP	100
+
+static int duration = 0;
+
 struct get_stack_trace_t {
 	int pid;
 	int kern_stack_size;
@@ -13,7 +20,7 @@ struct get_stack_trace_t {
 	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
 };
 
-static int get_stack_print_output(void *data, int size)
+static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 {
 	bool good_kern_stack = false, good_user_stack = false;
 	const char *nonjit_func = "___bpf_prog_run";
@@ -34,7 +41,7 @@ static int get_stack_print_output(void *data, int size)
 		 * just assume it is good if the stack is not empty.
 		 * This could be improved in the future.
 		 */
-		if (jit_enabled) {
+		if (env.jit_enabled) {
 			found = num_stack > 0;
 		} else {
 			for (i = 0; i < num_stack; i++) {
@@ -51,7 +58,7 @@ static int get_stack_print_output(void *data, int size)
 		}
 	} else {
 		num_stack = e->kern_stack_size / sizeof(__u64);
-		if (jit_enabled) {
+		if (env.jit_enabled) {
 			good_kern_stack = num_stack > 0;
 		} else {
 			for (i = 0; i < num_stack; i++) {
@@ -65,75 +72,76 @@ static int get_stack_print_output(void *data, int size)
 		if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
 			good_user_stack = true;
 	}
-	if (!good_kern_stack || !good_user_stack)
-		return LIBBPF_PERF_EVENT_ERROR;
 
-	if (cnt == MAX_CNT_RAWTP)
-		return LIBBPF_PERF_EVENT_DONE;
-
-	return LIBBPF_PERF_EVENT_CONT;
+	if (!good_kern_stack)
+	    CHECK(!good_kern_stack, "kern_stack", "corrupted kernel stack\n");
+	if (!good_user_stack)
+	    CHECK(!good_user_stack, "user_stack", "corrupted user stack\n");
 }
 
 void test_get_stack_raw_tp(void)
 {
 	const char *file = "./test_get_stack_rawtp.o";
-	int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
-	struct perf_event_attr attr = {};
+	const char *prog_name = "raw_tracepoint/sys_enter";
+	int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
+	struct perf_buffer_opts pb_opts = {};
+	struct perf_buffer *pb = NULL;
+	struct bpf_link *link = NULL;
 	struct timespec tv = {0, 10};
-	__u32 key = 0, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_map *map;
+	cpu_set_t cpu_set;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
 		return;
 
-	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
-	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
 		goto close_prog;
 
-	perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
-	if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
-		  perfmap_fd, errno))
+	map = bpf_object__find_map_by_name(obj, "perfmap");
+	if (CHECK(!map, "bpf_find_map", "not found\n"))
 		goto close_prog;
 
 	err = load_kallsyms();
 	if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
 		goto close_prog;
 
-	attr.sample_type = PERF_SAMPLE_RAW;
-	attr.type = PERF_TYPE_SOFTWARE;
-	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
-	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
-			 -1/*group_fd*/, 0);
-	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
-		  errno))
+	CPU_ZERO(&cpu_set);
+	CPU_SET(0, &cpu_set);
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
 		goto close_prog;
 
-	err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
-	if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
-		  errno))
+	link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
-		  err, errno))
-		goto close_prog;
-
-	err = perf_event_mmap(pmu_fd);
-	if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+	pb_opts.sample_cb = get_stack_print_output;
+	pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
+	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
 		goto close_prog;
 
 	/* trigger some syscall action */
 	for (i = 0; i < MAX_CNT_RAWTP; i++)
 		nanosleep(&tv, NULL);
 
-	err = perf_event_poller(pmu_fd, get_stack_print_output);
-	if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
-		goto close_prog;
+	while (exp_cnt > 0) {
+		err = perf_buffer__poll(pb, 100);
+		if (err < 0 && CHECK(err < 0, "pb__poll", "err %d\n", err))
+			goto close_prog;
+		exp_cnt -= err;
+	}
 
 	goto close_prog_noerr;
 close_prog:
 	error_cnt++;
 close_prog_noerr:
+	if (!IS_ERR_OR_NULL(link))
+		bpf_link__destroy(link);
+	if (!IS_ERR_OR_NULL(pb))
+		perf_buffer__free(pb);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 5633be43828f..4a4f428d1a78 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,15 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
-static int libbpf_debug_print(enum libbpf_print_level level,
-			      const char *format, va_list args)
-{
-	if (level == LIBBPF_DEBUG)
-		return 0;
-
-	return vfprintf(stderr, format, args);
-}
-
 void test_reference_tracking(void)
 {
 	const char *file = "./test_sk_lookup_kern.o";
@@ -36,9 +27,11 @@ void test_reference_tracking(void)
 
 		/* Expect verifier failure if test name has 'fail' */
 		if (strstr(title, "fail") != NULL) {
-			libbpf_set_print(NULL);
+			libbpf_print_fn_t old_print_fn;
+
+			old_print_fn = libbpf_set_print(NULL);
 			err = !bpf_program__load(prog, "GPL", 0);
-			libbpf_set_print(libbpf_debug_print);
+			libbpf_set_print(old_print_fn);
 		} else {
 			err = bpf_program__load(prog, "GPL", 0);
 		}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 54218ee3c004..1575f0a1f586 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -203,7 +203,7 @@ static int test_send_signal_nmi(void)
 	if (pmu_fd == -1) {
 		if (errno == ENOENT) {
 			printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
-				__func__);
+			       __func__);
 			return 0;
 		}
 		/* Let the test fail with a more informative message */
@@ -219,11 +219,10 @@ void test_send_signal(void)
 {
 	int ret = 0;
 
-	ret |= test_send_signal_tracepoint();
-	ret |= test_send_signal_perf();
-	ret |= test_send_signal_nmi();
-	if (!ret)
-		printf("test_send_signal:OK\n");
-	else
-		printf("test_send_signal:FAIL\n");
+	if (test__start_subtest("send_signal_tracepoint"))
+		ret |= test_send_signal_tracepoint();
+	if (test__start_subtest("send_signal_perf"))
+		ret |= test_send_signal_perf();
+	if (test__start_subtest("send_signal_nmi"))
+		ret |= test_send_signal_nmi();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 09e6b46f5515..15f7c272edb0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -75,7 +75,8 @@ void test_xdp_noinline(void)
 	}
 	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
 		error_cnt++;
-		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+		printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
+		       bytes, pkts);
 	}
 out:
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 5ae485a6af3f..040a44206f29 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -65,8 +65,8 @@ struct {
 } jmp_table SEC(".maps");
 
 struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1024);
 	__type(key, __u32);
 	__type(value, struct bpf_flow_keys);
 } last_dissection SEC(".maps");
@@ -74,15 +74,20 @@ struct {
 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
 					    int ret)
 {
-	struct bpf_flow_keys *val;
-	__u32 key = 0;
+	__u32 key = (__u32)(keys->sport) << 16 | keys->dport;
+	struct bpf_flow_keys val;
 
-	val = bpf_map_lookup_elem(&last_dissection, &key);
-	if (val)
-		memcpy(val, keys, sizeof(*val));
+	memcpy(&val, keys, sizeof(val));
+	bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
 	return ret;
 }
 
+#define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
+static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
+{
+	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
+}
+
 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
 							 __u16 hdr_size,
 							 void *buffer)
@@ -153,7 +158,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 	struct tcphdr *tcp, _tcp;
 	struct udphdr *udp, _udp;
 
-	keys->ip_proto = proto;
 	switch (proto) {
 	case IPPROTO_ICMP:
 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
@@ -162,9 +166,15 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 		return export_flow_keys(keys, BPF_OK);
 	case IPPROTO_IPIP:
 		keys->is_encap = true;
+		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+			return export_flow_keys(keys, BPF_OK);
+
 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
 	case IPPROTO_IPV6:
 		keys->is_encap = true;
+		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+			return export_flow_keys(keys, BPF_OK);
+
 		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
 	case IPPROTO_GRE:
 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
@@ -184,6 +194,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 			keys->thoff += 4; /* Step over sequence number */
 
 		keys->is_encap = true;
+		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+			return export_flow_keys(keys, BPF_OK);
 
 		if (gre->proto == bpf_htons(ETH_P_TEB)) {
 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
@@ -231,7 +243,6 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
 {
 	struct bpf_flow_keys *keys = skb->flow_keys;
 
-	keys->ip_proto = nexthdr;
 	switch (nexthdr) {
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_DSTOPTS:
@@ -266,6 +277,7 @@ PROG(IP)(struct __sk_buff *skb)
 	keys->addr_proto = ETH_P_IP;
 	keys->ipv4_src = iph->saddr;
 	keys->ipv4_dst = iph->daddr;
+	keys->ip_proto = iph->protocol;
 
 	keys->thoff += iph->ihl << 2;
 	if (data + keys->thoff > data_end)
@@ -273,13 +285,20 @@ PROG(IP)(struct __sk_buff *skb)
 
 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
 		keys->is_frag = true;
-		if (iph->frag_off & bpf_htons(IP_OFFSET))
+		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
 			/* From second fragment on, packets do not have headers
 			 * we can parse.
 			 */
 			done = true;
-		else
+		} else {
 			keys->is_first_frag = true;
+			/* No need to parse fragmented packet unless
+			 * explicitly asked for.
+			 */
+			if (!(keys->flags &
+			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
+				done = true;
+		}
 	}
 
 	if (done)
@@ -301,6 +320,11 @@ PROG(IPV6)(struct __sk_buff *skb)
 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
 
 	keys->thoff += sizeof(struct ipv6hdr);
+	keys->ip_proto = ip6h->nexthdr;
+	keys->flow_label = ip6_flowlabel(ip6h);
+
+	if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+		return export_flow_keys(keys, BPF_OK);
 
 	return parse_ipv6_proto(skb, ip6h->nexthdr);
 }
@@ -317,7 +341,8 @@ PROG(IPV6OP)(struct __sk_buff *skb)
 	/* hlen is in 8-octets and does not include the first 8 bytes
 	 * of the header
 	 */
-	skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3;
+	keys->thoff += (1 + ip6h->hdrlen) << 3;
+	keys->ip_proto = ip6h->nexthdr;
 
 	return parse_ipv6_proto(skb, ip6h->nexthdr);
 }
@@ -333,9 +358,18 @@ PROG(IPV6FR)(struct __sk_buff *skb)
 
 	keys->thoff += sizeof(*fragh);
 	keys->is_frag = true;
-	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+	keys->ip_proto = fragh->nexthdr;
+
+	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
 		keys->is_first_frag = true;
 
+		/* No need to parse fragmented packet unless
+		 * explicitly asked for.
+		 */
+		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
+			return export_flow_keys(keys, BPF_OK);
+	}
+
 	return parse_ipv6_proto(skb, fragh->nexthdr);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
new file mode 100644
index 000000000000..018ed7fbba3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
new file mode 100644
index 000000000000..13d662c57014
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_dim x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
new file mode 100644
index 000000000000..a351f418c85d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___diff_arr_val_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
new file mode 100644
index 000000000000..a8735009becc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_non_array x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
new file mode 100644
index 000000000000..2a67c28b1e75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_shallow x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
new file mode 100644
index 000000000000..1142c08c925f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_too_small x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
new file mode 100644
index 000000000000..795a5b729176
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_wrong_val_type1 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
new file mode 100644
index 000000000000..3af74b837c4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_wrong_val_type2 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
new file mode 100644
index 000000000000..b74455b91227
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
new file mode 100644
index 000000000000..7b6035f86ee6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_flavors__err_wrong_name x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
new file mode 100644
index 000000000000..7d0f041042c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
new file mode 100644
index 000000000000..f9359450186e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___bool x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c
new file mode 100644
index 000000000000..50369e8320a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___err_bitfield x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c
new file mode 100644
index 000000000000..823bac13d641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___err_wrong_sz_16 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c
new file mode 100644
index 000000000000..b44f3be18535
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___err_wrong_sz_32 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c
new file mode 100644
index 000000000000..9a3dd2099c0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___err_wrong_sz_64 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c
new file mode 100644
index 000000000000..9f11ef5f6e88
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___err_wrong_sz_8 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
new file mode 100644
index 000000000000..aafb1c5819d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ints___reverse_sign x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c
new file mode 100644
index 000000000000..ed9ad8b5b4f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c
@@ -0,0 +1,5 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_misc___a x) {}
+void f2(struct core_reloc_misc___b x) {}
+void f3(struct core_reloc_misc_extensible x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
new file mode 100644
index 000000000000..124197a2e813
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
new file mode 100644
index 000000000000..f8a6592ca75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___mod_swap x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
new file mode 100644
index 000000000000..5c0d73687247
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_mods___typedefs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
new file mode 100644
index 000000000000..4480fcc0f183
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
new file mode 100644
index 000000000000..13e108f76ece
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___anon_embed x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
new file mode 100644
index 000000000000..76b54fda5fbb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c
@@ -0,0 +1,5 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___dup_compat_types x) {}
+void f2(struct core_reloc_nesting___dup_compat_types__2 x) {}
+void f3(struct core_reloc_nesting___dup_compat_types__3 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
new file mode 100644
index 000000000000..975fb95db810
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
new file mode 100644
index 000000000000..ad66c67e7980
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_array_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
new file mode 100644
index 000000000000..35c5f8da6812
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_dup_incompat_types__1 x) {}
+void f2(struct core_reloc_nesting___err_dup_incompat_types__2 x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
new file mode 100644
index 000000000000..142e332041db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
new file mode 100644
index 000000000000..efcae167fab9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_missing_field x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
new file mode 100644
index 000000000000..97aaaedd8ada
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_nonstruct_container x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
new file mode 100644
index 000000000000..ffde35086e90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f1(struct core_reloc_nesting___err_partial_match_dups__a x) {}
+void f2(struct core_reloc_nesting___err_partial_match_dups__b x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
new file mode 100644
index 000000000000..39a2fadd8e95
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___err_too_deep x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
new file mode 100644
index 000000000000..a09d9dfb20df
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___extra_nesting x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
new file mode 100644
index 000000000000..3d8a1a74012f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_nesting___struct_union_mixup x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
new file mode 100644
index 000000000000..96b90e39242a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
new file mode 100644
index 000000000000..6e87233a3ed0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_enum_def x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
new file mode 100644
index 000000000000..d9f48e80b9d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_func_proto x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
new file mode 100644
index 000000000000..c718f75f8f3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___diff_ptr_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
new file mode 100644
index 000000000000..b8a120830891
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_enum x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
new file mode 100644
index 000000000000..ad8b3c9aa76f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_int x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
new file mode 100644
index 000000000000..e20bc1d42d0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_primitives___err_non_ptr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
new file mode 100644
index 000000000000..8da52432ba17
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ptr_as_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
new file mode 100644
index 000000000000..003acfc9a3e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_ptr_as_arr___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
new file mode 100644
index 000000000000..f686a8138d90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -0,0 +1,667 @@
+#include <stdint.h>
+#include <stdbool.h>
+
+/*
+ * FLAVORS
+ */
+struct core_reloc_flavors {
+	int a;
+	int b;
+	int c;
+};
+
+/* this is not a flavor, as it doesn't have triple underscore */
+struct core_reloc_flavors__err_wrong_name {
+	int a;
+	int b;
+	int c;
+};
+
+/*
+ * NESTING
+ */
+/* original set up, used to record relocations in BPF program */
+struct core_reloc_nesting_substruct {
+	int a;
+};
+
+union core_reloc_nesting_subunion {
+	int b;
+};
+
+struct core_reloc_nesting {
+	union {
+		struct core_reloc_nesting_substruct a;
+	} a;
+	struct {
+		union core_reloc_nesting_subunion b;
+	} b;
+};
+
+/* inlined anonymous struct/union instead of named structs in original */
+struct core_reloc_nesting___anon_embed {
+	int __just_for_padding;
+	union {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		union {
+			int b;
+		} b;
+	} b;
+};
+
+/* different mix of nested structs/unions than in original */
+struct core_reloc_nesting___struct_union_mixup {
+	int __a;
+	struct {
+		int __a;
+		union {
+			char __a;
+			int a;
+		} a;
+	} a;
+	int __b;
+	union {
+		int __b;
+		union {
+			char __b;
+			int b;
+		} b;
+	} b;
+};
+
+/* extra anon structs/unions, but still valid a.a.a and b.b.b accessors */
+struct core_reloc_nesting___extra_nesting {
+	int __padding;
+	struct {
+		struct {
+			struct {
+				struct {
+					union {
+						int a;
+					} a;
+				};
+			};
+		} a;
+		int __some_more;
+		struct {
+			union {
+				union {
+					union {
+						struct {
+							int b;
+						};
+					} b;
+				};
+			} b;
+		};
+	};
+};
+
+/* three flavors of same struct with different structure but same layout for
+ * a.a.a and b.b.b, thus successfully resolved and relocatable */
+struct core_reloc_nesting___dup_compat_types {
+	char __just_for_padding;
+	/* 3 more bytes of padding */
+	struct {
+		struct {
+			int a; /* offset 4 */
+		} a;
+	} a;
+	long long __more_padding;
+	struct {
+		struct {
+			int b; /* offset 16 */
+		} b;
+	} b;
+};
+
+struct core_reloc_nesting___dup_compat_types__2 {
+	int __aligned_padding;
+	struct {
+		int __trickier_noop[0];
+		struct {
+			char __some_more_noops[0];
+			int a; /* offset 4 */
+		} a;
+	} a;
+	int __more_padding;
+	struct {
+		struct {
+			struct {
+				int __critical_padding;
+				int b; /* offset 16 */
+			} b;
+			int __does_not_matter;
+		};
+	} b;
+	int __more_irrelevant_stuff;
+};
+
+struct core_reloc_nesting___dup_compat_types__3 {
+	char __correct_padding[4];
+	struct {
+		struct {
+			int a; /* offset 4 */
+		} a;
+	} a;
+	/* 8 byte padding due to next struct's alignment */
+	struct {
+		struct {
+			int b;
+		} b;
+	} b __attribute__((aligned(16)));
+};
+
+/* b.b.b field is missing */
+struct core_reloc_nesting___err_missing_field {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		struct {
+			int x;
+		} b;
+	} b;
+};
+
+/* b.b.b field is an array of integers instead of plain int */
+struct core_reloc_nesting___err_array_field {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		struct {
+			int b[1];
+		} b;
+	} b;
+};
+
+/* middle b container is missing */
+struct core_reloc_nesting___err_missing_container {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		int x;
+	} b;
+};
+
+/* middle b container is referenced through pointer instead of being embedded */
+struct core_reloc_nesting___err_nonstruct_container {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		struct {
+			int b;
+		} *b;
+	} b;
+};
+
+/* middle b container is an array of structs instead of plain struct */
+struct core_reloc_nesting___err_array_container {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	struct {
+		struct {
+			int b;
+		} b[1];
+	} b;
+};
+
+/* two flavors of same struct with incompatible layout for b.b.b */
+struct core_reloc_nesting___err_dup_incompat_types__1 {
+	struct {
+		struct {
+			int a; /* offset 0 */
+		} a;
+	} a;
+	struct {
+		struct {
+			int b; /* offset 4 */
+		} b;
+	} b;
+};
+
+struct core_reloc_nesting___err_dup_incompat_types__2 {
+	struct {
+		struct {
+			int a; /* offset 0 */
+		} a;
+	} a;
+	int __extra_padding;
+	struct {
+		struct {
+			int b; /* offset 8 (!) */
+		} b;
+	} b;
+};
+
+/* two flavors of same struct having one of a.a.a and b.b.b, but not both */
+struct core_reloc_nesting___err_partial_match_dups__a {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+};
+
+struct core_reloc_nesting___err_partial_match_dups__b {
+	struct {
+		struct {
+			int b;
+		} b;
+	} b;
+};
+
+struct core_reloc_nesting___err_too_deep {
+	struct {
+		struct {
+			int a;
+		} a;
+	} a;
+	/* 65 levels of nestedness for b.b.b */
+	struct {
+		struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+			struct { struct { struct { struct { struct {
+				/* this one is one too much */
+				struct {
+					int b;
+				};
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+			}; }; }; }; };
+		} b;
+	} b;
+};
+
+/*
+ * ARRAYS
+ */
+struct core_reloc_arrays_output {
+	int a2;
+	char b123;
+	int c1c;
+	int d00d;
+};
+
+struct core_reloc_arrays_substruct {
+	int c;
+	int d;
+};
+
+struct core_reloc_arrays {
+	int a[5];
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+/* bigger array dimensions */
+struct core_reloc_arrays___diff_arr_dim {
+	int a[7];
+	char b[3][4][5];
+	struct core_reloc_arrays_substruct c[4];
+	struct core_reloc_arrays_substruct d[2][3];
+};
+
+/* different size of array's value (struct) */
+struct core_reloc_arrays___diff_arr_val_sz {
+	int a[5];
+	char b[2][3][4];
+	struct {
+		int __padding1;
+		int c;
+		int __padding2;
+	} c[3];
+	struct {
+		int __padding1;
+		int d;
+		int __padding2;
+	} d[1][2];
+};
+
+struct core_reloc_arrays___err_too_small {
+	int a[2]; /* this one is too small */
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_too_shallow {
+	int a[5];
+	char b[2][3]; /* this one lacks one dimension */
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_non_array {
+	int a; /* not an array */
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_wrong_val_type1 {
+	char a[5]; /* char instead of int */
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+struct core_reloc_arrays___err_wrong_val_type2 {
+	int a[5];
+	char b[2][3][4];
+	int c[3]; /* value is not a struct */
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+/*
+ * PRIMITIVES
+ */
+enum core_reloc_primitives_enum {
+	A = 0,
+	B = 1,
+};
+
+struct core_reloc_primitives {
+	char a;
+	int b;
+	enum core_reloc_primitives_enum c;
+	void *d;
+	int (*f)(const char *);
+};
+
+struct core_reloc_primitives___diff_enum_def {
+	char a;
+	int b;
+	void *d;
+	int (*f)(const char *);
+	enum {
+		X = 100,
+		Y = 200,
+	} c; /* inline enum def with differing set of values */
+};
+
+struct core_reloc_primitives___diff_func_proto {
+	void (*f)(int); /* incompatible function prototype */
+	void *d;
+	enum core_reloc_primitives_enum c;
+	int b;
+	char a;
+};
+
+struct core_reloc_primitives___diff_ptr_type {
+	const char * const d; /* different pointee type + modifiers */
+	char a;
+	int b;
+	enum core_reloc_primitives_enum c;
+	int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_enum {
+	char a[1];
+	int b;
+	int c; /* int instead of enum */
+	void *d;
+	int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_int {
+	char a[1];
+	int *b; /* ptr instead of int */
+	enum core_reloc_primitives_enum c;
+	void *d;
+	int (*f)(const char *);
+};
+
+struct core_reloc_primitives___err_non_ptr {
+	char a[1];
+	int b;
+	enum core_reloc_primitives_enum c;
+	int d; /* int instead of ptr */
+	int (*f)(const char *);
+};
+
+/*
+ * MODS
+ */
+struct core_reloc_mods_output {
+	int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+	int x;
+	int y;
+};
+
+typedef struct {
+	int x;
+	int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+	int a;
+	int_t b;
+	char *c;
+	char_ptr_t d;
+	int e[3];
+	arr_t f;
+	struct core_reloc_mods_substruct g;
+	core_reloc_mods_substruct_t h;
+};
+
+/* a/b, c/d, e/f, and g/h pairs are swapped */
+struct core_reloc_mods___mod_swap {
+	int b;
+	int_t a;
+	char *d;
+	char_ptr_t c;
+	int f[3];
+	arr_t e;
+	struct {
+		int y;
+		int x;
+	} h;
+	core_reloc_mods_substruct_t g;
+};
+
+typedef int int1_t;
+typedef int1_t int2_t;
+typedef int2_t int3_t;
+
+typedef int arr1_t[5];
+typedef arr1_t arr2_t;
+typedef arr2_t arr3_t;
+typedef arr3_t arr4_t;
+
+typedef const char * const volatile fancy_char_ptr_t;
+
+typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
+
+/* we need more typedefs */
+struct core_reloc_mods___typedefs {
+	core_reloc_mods_substruct_tt g;
+	core_reloc_mods_substruct_tt h;
+	arr4_t f;
+	arr4_t e;
+	fancy_char_ptr_t d;
+	fancy_char_ptr_t c;
+	int3_t b;
+	int3_t a;
+};
+
+/*
+ * PTR_AS_ARR
+ */
+struct core_reloc_ptr_as_arr {
+	int a;
+};
+
+struct core_reloc_ptr_as_arr___diff_sz {
+	int :32; /* padding */
+	char __some_more_padding;
+	int a;
+};
+
+/*
+ * INTS
+ */
+struct core_reloc_ints {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field;
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+/* signed/unsigned types swap */
+struct core_reloc_ints___reverse_sign {
+	int8_t		u8_field;
+	uint8_t		s8_field;
+	int16_t		u16_field;
+	uint16_t	s16_field;
+	int32_t		u32_field;
+	uint32_t	s32_field;
+	int64_t		u64_field;
+	uint64_t	s64_field;
+};
+
+struct core_reloc_ints___bool {
+	bool		u8_field; /* bool instead of uint8 */
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field;
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+struct core_reloc_ints___err_bitfield {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field: 32; /* bitfields are not supported */
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+struct core_reloc_ints___err_wrong_sz_8 {
+	uint16_t	u8_field; /* not 8-bit anymore */
+	int16_t		s8_field; /* not 8-bit anymore */
+
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field;
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+struct core_reloc_ints___err_wrong_sz_16 {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+
+	uint32_t	u16_field; /* not 16-bit anymore */
+	int32_t		s16_field; /* not 16-bit anymore */
+
+	uint32_t	u32_field;
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+struct core_reloc_ints___err_wrong_sz_32 {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+
+	uint64_t	u32_field; /* not 32-bit anymore */
+	int64_t		s32_field; /* not 32-bit anymore */
+
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+struct core_reloc_ints___err_wrong_sz_64 {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field;
+	int32_t		s32_field;
+
+	uint32_t	u64_field; /* not 64-bit anymore */
+	int32_t		s64_field; /* not 64-bit anymore */
+};
+
+/*
+ * MISC
+ */
+struct core_reloc_misc_output {
+	int a, b, c;
+};
+
+struct core_reloc_misc___a {
+	int a1;
+	int a2;
+};
+
+struct core_reloc_misc___b {
+	int b1;
+	int b2;
+};
+
+/* this one extends core_reloc_misc_extensible struct from BPF prog */
+struct core_reloc_misc_extensible {
+	int a;
+	int b;
+	int c;
+	int d;
+};
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
new file mode 100644
index 000000000000..650859022771
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("socket")
+int combinations(volatile struct __sk_buff* skb)
+{
+	int ret = 0, i;
+
+#pragma nounroll
+	for (i = 0; i < 20; i++)
+		if (skb->len)
+			ret |= 1 << i;
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
new file mode 100644
index 000000000000..28d1d668f07c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+char _license[] SEC("license") = "GPL";
+
+SEC("socket")
+int while_true(volatile struct __sk_buff* skb)
+{
+	int i = 0;
+
+	while (1) {
+		if (skb->len)
+			i += 3;
+		else
+			i += 7;
+		if (i == 9)
+			break;
+		barrier();
+		if (i == 10)
+			break;
+		barrier();
+		if (i == 13)
+			break;
+		barrier();
+		if (i == 14)
+			break;
+	}
+	return i;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 076122c898e9..9a3d1c79e6fe 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <string.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
 
@@ -42,6 +44,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		return 1;
 	}
 
+	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
+		/* Not interested in SOL_TCP:TCP_CONGESTION;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+	}
+
 	if (ctx->level != SOL_CUSTOM)
 		return 0; /* EPERM, deny everything except custom level */
 
@@ -91,6 +101,18 @@ int _setsockopt(struct bpf_sockopt *ctx)
 		return 1;
 	}
 
+	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
+		/* Always use cubic */
+
+		if (optval + 5 > optval_end)
+			return 0; /* EPERM, bounds check */
+
+		memcpy(optval, "cubic", 5);
+		ctx->optlen = 5;
+
+		return 1;
+	}
+
 	if (ctx->level != SOL_CUSTOM)
 		return 0; /* EPERM, deny everything except custom level */
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
new file mode 100644
index 000000000000..bf67f0fdf743
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_arrays_output {
+	int a2;
+	char b123;
+	int c1c;
+	int d00d;
+};
+
+struct core_reloc_arrays_substruct {
+	int c;
+	int d;
+};
+
+struct core_reloc_arrays {
+	int a[5];
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_arrays(void *ctx)
+{
+	struct core_reloc_arrays *in = (void *)&data.in;
+	struct core_reloc_arrays_output *out = (void *)&data.out;
+
+	/* in->a[2] */
+	if (BPF_CORE_READ(&out->a2, &in->a[2]))
+		return 1;
+	/* in->b[1][2][3] */
+	if (BPF_CORE_READ(&out->b123, &in->b[1][2][3]))
+		return 1;
+	/* in->c[1].c */
+	if (BPF_CORE_READ(&out->c1c, &in->c[1].c))
+		return 1;
+	/* in->d[0][0].d */
+	if (BPF_CORE_READ(&out->d00d, &in->d[0][0].d))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
new file mode 100644
index 000000000000..9fda73e87972
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_flavors {
+	int a;
+	int b;
+	int c;
+};
+
+/* local flavor with reversed layout */
+struct core_reloc_flavors___reversed {
+	int c;
+	int b;
+	int a;
+};
+
+/* local flavor with nested/overlapping layout */
+struct core_reloc_flavors___weird {
+	struct {
+		int b;
+	};
+	/* a and c overlap in local flavor, but this should still work
+	 * correctly with target original flavor
+	 */
+	union {
+		int a;
+		int c;
+	};
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_flavors(void *ctx)
+{
+	struct core_reloc_flavors *in_orig = (void *)&data.in;
+	struct core_reloc_flavors___reversed *in_rev = (void *)&data.in;
+	struct core_reloc_flavors___weird *in_weird = (void *)&data.in;
+	struct core_reloc_flavors *out = (void *)&data.out;
+
+	/* read a using weird layout */
+	if (BPF_CORE_READ(&out->a, &in_weird->a))
+		return 1;
+	/* read b using reversed layout */
+	if (BPF_CORE_READ(&out->b, &in_rev->b))
+		return 1;
+	/* read c using original layout */
+	if (BPF_CORE_READ(&out->c, &in_orig->c))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
new file mode 100644
index 000000000000..d99233c8008a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_ints {
+	uint8_t		u8_field;
+	int8_t		s8_field;
+	uint16_t	u16_field;
+	int16_t		s16_field;
+	uint32_t	u32_field;
+	int32_t		s32_field;
+	uint64_t	u64_field;
+	int64_t		s64_field;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_ints(void *ctx)
+{
+	struct core_reloc_ints *in = (void *)&data.in;
+	struct core_reloc_ints *out = (void *)&data.out;
+
+	if (BPF_CORE_READ(&out->u8_field, &in->u8_field) ||
+	    BPF_CORE_READ(&out->s8_field, &in->s8_field) ||
+	    BPF_CORE_READ(&out->u16_field, &in->u16_field) ||
+	    BPF_CORE_READ(&out->s16_field, &in->s16_field) ||
+	    BPF_CORE_READ(&out->u32_field, &in->u32_field) ||
+	    BPF_CORE_READ(&out->s32_field, &in->s32_field) ||
+	    BPF_CORE_READ(&out->u64_field, &in->u64_field) ||
+	    BPF_CORE_READ(&out->s64_field, &in->s64_field))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
new file mode 100644
index 000000000000..37e02aa3f0c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct task_struct {
+	int pid;
+	int tgid;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_kernel(void *ctx)
+{
+	struct task_struct *task = (void *)bpf_get_current_task();
+	uint64_t pid_tgid = bpf_get_current_pid_tgid();
+	int pid, tgid;
+
+	if (BPF_CORE_READ(&pid, &task->pid) ||
+	    BPF_CORE_READ(&tgid, &task->tgid))
+		return 1;
+
+	/* validate pid + tgid matches */
+	data.out[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
new file mode 100644
index 000000000000..c59984bd3e23
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_misc_output {
+	int a, b, c;
+};
+
+struct core_reloc_misc___a {
+	int a1;
+	int a2;
+};
+
+struct core_reloc_misc___b {
+	int b1;
+	int b2;
+};
+
+/* fixed two first members, can be extended with new fields */
+struct core_reloc_misc_extensible {
+	int a;
+	int b;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_misc(void *ctx)
+{
+	struct core_reloc_misc___a *in_a = (void *)&data.in;
+	struct core_reloc_misc___b *in_b = (void *)&data.in;
+	struct core_reloc_misc_extensible *in_ext = (void *)&data.in;
+	struct core_reloc_misc_output *out = (void *)&data.out;
+
+	/* record two different relocations with the same accessor string */
+	if (BPF_CORE_READ(&out->a, &in_a->a1) ||	/* accessor: 0:0 */
+	    BPF_CORE_READ(&out->b, &in_b->b1))		/* accessor: 0:0 */
+		return 1;
+
+	/* Validate relocations capture array-only accesses for structs with
+	 * fixed header, but with potentially extendable tail. This will read
+	 * first 4 bytes of 2nd element of in_ext array of potentially
+	 * variably sized struct core_reloc_misc_extensible. */ 
+	if (BPF_CORE_READ(&out->c, &in_ext[2]))		/* accessor: 2 */
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
new file mode 100644
index 000000000000..f98b942c062b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_mods_output {
+	int a, b, c, d, e, f, g, h;
+};
+
+typedef const int int_t;
+typedef const char *char_ptr_t;
+typedef const int arr_t[7];
+
+struct core_reloc_mods_substruct {
+	int x;
+	int y;
+};
+
+typedef struct {
+	int x;
+	int y;
+} core_reloc_mods_substruct_t;
+
+struct core_reloc_mods {
+	int a;
+	int_t b;
+	char *c;
+	char_ptr_t d;
+	int e[3];
+	arr_t f;
+	struct core_reloc_mods_substruct g;
+	core_reloc_mods_substruct_t h;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_mods(void *ctx)
+{
+	struct core_reloc_mods *in = (void *)&data.in;
+	struct core_reloc_mods_output *out = (void *)&data.out;
+
+	if (BPF_CORE_READ(&out->a, &in->a) ||
+	    BPF_CORE_READ(&out->b, &in->b) ||
+	    BPF_CORE_READ(&out->c, &in->c) ||
+	    BPF_CORE_READ(&out->d, &in->d) ||
+	    BPF_CORE_READ(&out->e, &in->e[2]) ||
+	    BPF_CORE_READ(&out->f, &in->f[1]) ||
+	    BPF_CORE_READ(&out->g, &in->g.x) ||
+	    BPF_CORE_READ(&out->h, &in->h.y))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
new file mode 100644
index 000000000000..3ca30cec2b39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_nesting_substruct {
+	int a;
+};
+
+union core_reloc_nesting_subunion {
+	int b;
+};
+
+/* int a.a.a and b.b.b accesses */
+struct core_reloc_nesting {
+	union {
+		struct core_reloc_nesting_substruct a;
+	} a;
+	struct {
+		union core_reloc_nesting_subunion b;
+	} b;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_nesting(void *ctx)
+{
+	struct core_reloc_nesting *in = (void *)&data.in;
+	struct core_reloc_nesting *out = (void *)&data.out;
+
+	if (BPF_CORE_READ(&out->a.a.a, &in->a.a.a))
+		return 1;
+	if (BPF_CORE_READ(&out->b.b.b, &in->b.b.b))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
new file mode 100644
index 000000000000..add52f23ab35
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+enum core_reloc_primitives_enum {
+	A = 0,
+	B = 1,
+};
+
+struct core_reloc_primitives {
+	char a;
+	int b;
+	enum core_reloc_primitives_enum c;
+	void *d;
+	int (*f)(const char *);
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_primitives(void *ctx)
+{
+	struct core_reloc_primitives *in = (void *)&data.in;
+	struct core_reloc_primitives *out = (void *)&data.out;
+
+	if (BPF_CORE_READ(&out->a, &in->a) ||
+	    BPF_CORE_READ(&out->b, &in->b) ||
+	    BPF_CORE_READ(&out->c, &in->c) ||
+	    BPF_CORE_READ(&out->d, &in->d) ||
+	    BPF_CORE_READ(&out->f, &in->f))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
new file mode 100644
index 000000000000..526b7ddc7ea1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+static volatile struct data {
+	char in[256];
+	char out[256];
+} data;
+
+struct core_reloc_ptr_as_arr {
+	int a;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_ptr_as_arr(void *ctx)
+{
+	struct core_reloc_ptr_as_arr *in = (void *)&data.in;
+	struct core_reloc_ptr_as_arr *out = (void *)&data.out;
+
+	if (BPF_CORE_READ(&out->a, &in[2].a))
+		return 1;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 33254b771384..f8ffa3f3d44b 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -55,7 +55,7 @@ struct {
 	__type(value, raw_stack_trace_t);
 } rawdata_map SEC(".maps");
 
-SEC("tracepoint/raw_syscalls/sys_enter")
+SEC("raw_tracepoint/sys_enter")
 int bpf_prog1(void *ctx)
 {
 	int max_len, max_buildid_len, usize, ksize, total_size;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 1ab095bcacd8..d8803dfa8d32 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -19,10 +19,29 @@
 struct bpf_map_def SEC("maps") results = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 1,
+	.value_size = sizeof(__u32),
+	.max_entries = 3,
 };
 
+static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
+					   void *iph, __u32 ip_size,
+					   struct tcphdr *tcph)
+{
+	__u32 thlen = tcph->doff * 4;
+
+	if (tcph->syn && !tcph->ack) {
+		// packet should only have an MSS option
+		if (thlen != 24)
+			return 0;
+
+		if ((void *)tcph + thlen > data_end)
+			return 0;
+
+		return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen);
+	}
+	return 0;
+}
+
 static __always_inline void check_syncookie(void *ctx, void *data,
 					    void *data_end)
 {
@@ -33,8 +52,10 @@ static __always_inline void check_syncookie(void *ctx, void *data,
 	struct ipv6hdr *ipv6h;
 	struct tcphdr *tcph;
 	int ret;
+	__u32 key_mss = 2;
+	__u32 key_gen = 1;
 	__u32 key = 0;
-	__u64 value = 1;
+	__s64 seq_mss;
 
 	ethh = data;
 	if (ethh + 1 > data_end)
@@ -66,6 +87,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
 		if (sk->state != BPF_TCP_LISTEN)
 			goto release;
 
+		seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h),
+					tcph);
+
 		ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h),
 					      tcph, sizeof(*tcph));
 		break;
@@ -95,6 +119,9 @@ static __always_inline void check_syncookie(void *ctx, void *data,
 		if (sk->state != BPF_TCP_LISTEN)
 			goto release;
 
+		seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h),
+					tcph);
+
 		ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h),
 					      tcph, sizeof(*tcph));
 		break;
@@ -103,8 +130,19 @@ static __always_inline void check_syncookie(void *ctx, void *data,
 		return;
 	}
 
-	if (ret == 0)
-		bpf_map_update_elem(&results, &key, &value, 0);
+	if (seq_mss > 0) {
+		__u32 cookie = (__u32)seq_mss;
+		__u32 mss = seq_mss >> 32;
+
+		bpf_map_update_elem(&results, &key_gen, &cookie, 0);
+		bpf_map_update_elem(&results, &key_mss, &mss, 0);
+	}
+
+	if (ret == 0) {
+		__u32 cookie = bpf_ntohl(tcph->ack_seq) - 1;
+
+		bpf_map_update_elem(&results, &key, &cookie, 0);
+	}
 
 release:
 	bpf_sk_release(sk);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 5443b9bd75ed..e1f1becda529 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data)
 	close(fd);
 }
 
+static void test_devmap_hash(unsigned int task, void *data)
+{
+	int fd;
+	__u32 key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	close(fd);
+}
+
 static void test_queuemap(unsigned int task, void *data)
 {
 	const int MAP_SIZE = 32;
@@ -1684,6 +1699,7 @@ static void run_all_tests(void)
 	test_arraymap_percpu_many_keys();
 
 	test_devmap(0, NULL);
+	test_devmap_hash(0, NULL);
 	test_sockmap(0, NULL);
 
 	test_map_large();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index dae0819b1141..12895d03d58b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,10 +3,107 @@
  */
 #include "test_progs.h"
 #include "bpf_rlimit.h"
+#include <argp.h>
+#include <string.h>
 
+/* defined in test_progs.h */
+struct test_env env;
 int error_cnt, pass_cnt;
-bool jit_enabled;
-bool verifier_stats = false;
+
+struct prog_test_def {
+	const char *test_name;
+	int test_num;
+	void (*run_test)(void);
+	bool force_log;
+	int pass_cnt;
+	int error_cnt;
+	bool tested;
+
+	const char *subtest_name;
+	int subtest_num;
+
+	/* store counts before subtest started */
+	int old_pass_cnt;
+	int old_error_cnt;
+};
+
+static bool should_run(struct test_selector *sel, int num, const char *name)
+{
+	if (sel->name && sel->name[0] && !strstr(name, sel->name))
+		return false;
+
+	if (!sel->num_set)
+		return true;
+
+	return num < sel->num_set_len && sel->num_set[num];
+}
+
+static void dump_test_log(const struct prog_test_def *test, bool failed)
+{
+	if (stdout == env.stdout)
+		return;
+
+	fflush(stdout); /* exports env.log_buf & env.log_cnt */
+
+	if (env.verbose || test->force_log || failed) {
+		if (env.log_cnt) {
+			fprintf(env.stdout, "%s", env.log_buf);
+			if (env.log_buf[env.log_cnt - 1] != '\n')
+				fprintf(env.stdout, "\n");
+		}
+	}
+
+	fseeko(stdout, 0, SEEK_SET); /* rewind */
+}
+
+void test__end_subtest()
+{
+	struct prog_test_def *test = env.test;
+	int sub_error_cnt = error_cnt - test->old_error_cnt;
+
+	if (sub_error_cnt)
+		env.fail_cnt++;
+	else
+		env.sub_succ_cnt++;
+
+	dump_test_log(test, sub_error_cnt);
+
+	fprintf(env.stdout, "#%d/%d %s:%s\n",
+	       test->test_num, test->subtest_num,
+	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+}
+
+bool test__start_subtest(const char *name)
+{
+	struct prog_test_def *test = env.test;
+
+	if (test->subtest_name) {
+		test__end_subtest();
+		test->subtest_name = NULL;
+	}
+
+	test->subtest_num++;
+
+	if (!name || !name[0]) {
+		fprintf(env.stderr,
+			"Subtest #%d didn't provide sub-test name!\n",
+			test->subtest_num);
+		return false;
+	}
+
+	if (!should_run(&env.subtest_selector, test->subtest_num, name))
+		return false;
+
+	test->subtest_name = name;
+	env.test->old_pass_cnt = pass_cnt;
+	env.test->old_error_cnt = error_cnt;
+
+	return true;
+}
+
+void test__force_log() {
+	env.test->force_log = true;
+}
 
 struct ipv4_packet pkt_v4 = {
 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -156,23 +253,276 @@ void *spin_lock_thread(void *arg)
 	pthread_exit(arg);
 }
 
-#define DECLARE
+/* extern declarations for test funcs */
+#define DEFINE_TEST(name) extern void test_##name();
+#include <prog_tests/tests.h>
+#undef DEFINE_TEST
+
+static struct prog_test_def prog_test_defs[] = {
+#define DEFINE_TEST(name) {		\
+	.test_name = #name,		\
+	.run_test = &test_##name,	\
+},
 #include <prog_tests/tests.h>
-#undef DECLARE
+#undef DEFINE_TEST
+};
+const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+const char *argp_program_version = "test_progs 0.1";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] = "BPF selftests test runner";
+
+enum ARG_KEYS {
+	ARG_TEST_NUM = 'n',
+	ARG_TEST_NAME = 't',
+	ARG_VERIFIER_STATS = 's',
+	ARG_VERBOSE = 'v',
+};
+
+static const struct argp_option opts[] = {
+	{ "num", ARG_TEST_NUM, "NUM", 0,
+	  "Run test number NUM only " },
+	{ "name", ARG_TEST_NAME, "NAME", 0,
+	  "Run tests with names containing NAME" },
+	{ "verifier-stats", ARG_VERIFIER_STATS, NULL, 0,
+	  "Output verifier statistics", },
+	{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
+	  "Verbose output (use -vv for extra verbose output)" },
+	{},
+};
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+			   const char *format, va_list args)
+{
+	if (!env.very_verbose && level == LIBBPF_DEBUG)
+		return 0;
+	vprintf(format, args);
+	return 0;
+}
+
+int parse_num_list(const char *s, struct test_selector *sel)
+{
+	int i, set_len = 0, num, start = 0, end = -1;
+	bool *set = NULL, *tmp, parsing_end = false;
+	char *next;
+
+	while (s[0]) {
+		errno = 0;
+		num = strtol(s, &next, 10);
+		if (errno)
+			return -errno;
+
+		if (parsing_end)
+			end = num;
+		else
+			start = num;
+
+		if (!parsing_end && *next == '-') {
+			s = next + 1;
+			parsing_end = true;
+			continue;
+		} else if (*next == ',') {
+			parsing_end = false;
+			s = next + 1;
+			end = num;
+		} else if (*next == '\0') {
+			parsing_end = false;
+			s = next;
+			end = num;
+		} else {
+			return -EINVAL;
+		}
+
+		if (start > end)
+			return -EINVAL;
 
-int main(int ac, char **av)
+		if (end + 1 > set_len) {
+			set_len = end + 1;
+			tmp = realloc(set, set_len);
+			if (!tmp) {
+				free(set);
+				return -ENOMEM;
+			}
+			set = tmp;
+		}
+		for (i = start; i <= end; i++) {
+			set[i] = true;
+		}
+
+	}
+
+	if (!set)
+		return -EINVAL;
+
+	sel->num_set = set;
+	sel->num_set_len = set_len;
+
+	return 0;
+}
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	struct test_env *env = state->input;
+
+	switch (key) {
+	case ARG_TEST_NUM: {
+		char *subtest_str = strchr(arg, '/');
+
+		if (subtest_str) {
+			*subtest_str = '\0';
+			if (parse_num_list(subtest_str + 1,
+					   &env->subtest_selector)) {
+				fprintf(stderr,
+					"Failed to parse subtest numbers.\n");
+				return -EINVAL;
+			}
+		}
+		if (parse_num_list(arg, &env->test_selector)) {
+			fprintf(stderr, "Failed to parse test numbers.\n");
+			return -EINVAL;
+		}
+		break;
+	}
+	case ARG_TEST_NAME: {
+		char *subtest_str = strchr(arg, '/');
+
+		if (subtest_str) {
+			*subtest_str = '\0';
+			env->subtest_selector.name = strdup(subtest_str + 1);
+			if (!env->subtest_selector.name)
+				return -ENOMEM;
+		}
+		env->test_selector.name = strdup(arg);
+		if (!env->test_selector.name)
+			return -ENOMEM;
+		break;
+	}
+	case ARG_VERIFIER_STATS:
+		env->verifier_stats = true;
+		break;
+	case ARG_VERBOSE:
+		if (arg) {
+			if (strcmp(arg, "v") == 0) {
+				env->very_verbose = true;
+			} else {
+				fprintf(stderr,
+					"Unrecognized verbosity setting ('%s'), only -v and -vv are supported\n",
+					arg);
+				return -EINVAL;
+			}
+		}
+		env->verbose = true;
+		break;
+	case ARGP_KEY_ARG:
+		argp_usage(state);
+		break;
+	case ARGP_KEY_END:
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+static void stdio_hijack(void)
 {
+#ifdef __GLIBC__
+	env.stdout = stdout;
+	env.stderr = stderr;
+
+	if (env.verbose) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	/* stdout and stderr -> buffer */
+	fflush(stdout);
+
+	stdout = open_memstream(&env.log_buf, &env.log_cnt);
+	if (!stdout) {
+		stdout = env.stdout;
+		perror("open_memstream");
+		return;
+	}
+
+	stderr = stdout;
+#endif
+}
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+	if (stdout == env.stdout)
+		return;
+
+	fclose(stdout);
+	free(env.log_buf);
+
+	env.log_buf = NULL;
+	env.log_cnt = 0;
+
+	stdout = env.stdout;
+	stderr = env.stderr;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+	};
+	int err, i;
+
+	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
+	if (err)
+		return err;
+
+	libbpf_set_print(libbpf_print_fn);
+
 	srand(time(NULL));
 
-	jit_enabled = is_jit_enabled();
+	env.jit_enabled = is_jit_enabled();
 
-	if (ac == 2 && strcmp(av[1], "-s") == 0)
-		verifier_stats = true;
+	stdio_hijack();
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *test = &prog_test_defs[i];
+		int old_pass_cnt = pass_cnt;
+		int old_error_cnt = error_cnt;
 
-#define CALL
-#include <prog_tests/tests.h>
-#undef CALL
+		env.test = test;
+		test->test_num = i + 1;
+
+		if (!should_run(&env.test_selector,
+				test->test_num, test->test_name))
+			continue;
+
+		test->run_test();
+		/* ensure last sub-test is finalized properly */
+		if (test->subtest_name)
+			test__end_subtest();
+
+		test->tested = true;
+		test->pass_cnt = pass_cnt - old_pass_cnt;
+		test->error_cnt = error_cnt - old_error_cnt;
+		if (test->error_cnt)
+			env.fail_cnt++;
+		else
+			env.succ_cnt++;
+
+		dump_test_log(test, test->error_cnt);
+
+		fprintf(env.stdout, "#%d %s:%s\n",
+			test->test_num, test->test_name,
+			test->error_cnt ? "FAIL" : "OK");
+	}
+	stdio_restore();
+	printf("Summary: %d/%d PASSED, %d FAILED\n",
+	       env.succ_cnt, env.sub_succ_cnt, env.fail_cnt);
+
+	free(env.test_selector.num_set);
+	free(env.subtest_selector.num_set);
 
-	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 49e0f7d85643..37d427f5a1e5 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -38,9 +38,40 @@ typedef __u16 __sum16;
 #include "trace_helpers.h"
 #include "flow_dissector_load.h"
 
-extern int error_cnt, pass_cnt;
-extern bool jit_enabled;
-extern bool verifier_stats;
+struct prog_test_def;
+
+struct test_selector {
+	const char *name;
+	bool *num_set;
+	int num_set_len;
+};
+
+struct test_env {
+	struct test_selector test_selector;
+	struct test_selector subtest_selector;
+	bool verifier_stats;
+	bool verbose;
+	bool very_verbose;
+
+	bool jit_enabled;
+
+	struct prog_test_def *test;
+	FILE *stdout;
+	FILE *stderr;
+	char *log_buf;
+	size_t log_cnt;
+
+	int succ_cnt; /* successful tests */
+	int sub_succ_cnt; /* successful sub-tests */
+	int fail_cnt; /* total failed tests + sub-tests */
+};
+
+extern int error_cnt;
+extern int pass_cnt;
+extern struct test_env env;
+
+extern void test__force_log();
+extern bool test__start_subtest(const char *name);
 
 #define MAGIC_BYTES 123
 
@@ -68,7 +99,8 @@ extern struct ipv6_packet pkt_v6;
 		printf(format);						\
 	} else {							\
 		pass_cnt++;						\
-		printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+		printf("%s:PASS:%s %d nsec\n",				\
+		       __func__, tag, duration);			\
 	}								\
 	__ret;								\
 })
diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c
index 036b652e5ca9..e4f6055d92e9 100644
--- a/tools/testing/selftests/bpf/test_sockopt_sk.c
+++ b/tools/testing/selftests/bpf/test_sockopt_sk.c
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 
 #include <linux/filter.h>
 #include <bpf/bpf.h>
@@ -25,6 +26,7 @@ static int getsetsockopt(void)
 	union {
 		char u8[4];
 		__u32 u32;
+		char cc[16]; /* TCP_CA_NAME_MAX */
 	} buf = {};
 	socklen_t optlen;
 
@@ -115,6 +117,29 @@ static int getsetsockopt(void)
 		goto err;
 	}
 
+	/* TCP_CONGESTION can extend the string */
+
+	strcpy(buf.cc, "nv");
+	err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv"));
+	if (err) {
+		log_err("Failed to call setsockopt(TCP_CONGESTION)");
+		goto err;
+	}
+
+
+	optlen = sizeof(buf.cc);
+	err = getsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(TCP_CONGESTION)");
+		goto err;
+	}
+
+	if (strcmp(buf.cc, "cubic") != 0) {
+		log_err("Unexpected getsockopt(TCP_CONGESTION) %s != %s",
+			buf.cc, "cubic");
+		goto err;
+	}
+
 	close(fd);
 	return 0;
 err:
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index d48e51716d19..9b3617d770a5 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -37,6 +37,9 @@ setup()
 	ns1_exec ip link set lo up
 
 	ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
+	ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0
+	ns1_exec sysctl -w net.ipv4.tcp_timestamps=0
+	ns1_exec sysctl -w net.ipv4.tcp_sack=0
 
 	wait_for_ip 127.0.0.1
 	wait_for_ip ::1
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index 87829c86c746..b9e991d43155 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -2,6 +2,7 @@
 // Copyright (c) 2018 Facebook
 // Copyright (c) 2019 Cloudflare
 
+#include <limits.h>
 #include <string.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -77,7 +78,7 @@ out:
 	return fd;
 }
 
-static int get_map_fd_by_prog_id(int prog_id)
+static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
 {
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
@@ -104,6 +105,8 @@ static int get_map_fd_by_prog_id(int prog_id)
 		goto err;
 	}
 
+	*xdp = info.type == BPF_PROG_TYPE_XDP;
+
 	map_fd = bpf_map_get_fd_by_id(map_ids[0]);
 	if (map_fd < 0)
 		log_err("Failed to get fd by map id %d", map_ids[0]);
@@ -113,18 +116,32 @@ err:
 	return map_fd;
 }
 
-static int run_test(int server_fd, int results_fd)
+static int run_test(int server_fd, int results_fd, bool xdp)
 {
 	int client = -1, srv_client = -1;
 	int ret = 0;
 	__u32 key = 0;
-	__u64 value = 0;
+	__u32 key_gen = 1;
+	__u32 key_mss = 2;
+	__u32 value = 0;
+	__u32 value_gen = 0;
+	__u32 value_mss = 0;
 
 	if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) {
 		log_err("Can't clear results");
 		goto err;
 	}
 
+	if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) {
+		log_err("Can't clear results");
+		goto err;
+	}
+
+	if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) {
+		log_err("Can't clear results");
+		goto err;
+	}
+
 	client = connect_to_server(server_fd);
 	if (client == -1)
 		goto err;
@@ -140,8 +157,35 @@ static int run_test(int server_fd, int results_fd)
 		goto err;
 	}
 
-	if (value != 1) {
-		log_err("Didn't match syncookie: %llu", value);
+	if (value == 0) {
+		log_err("Didn't match syncookie: %u", value);
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) {
+		log_err("Can't lookup result");
+		goto err;
+	}
+
+	if (xdp && value_gen == 0) {
+		// SYN packets do not get passed through generic XDP, skip the
+		// rest of the test.
+		printf("Skipping XDP cookie check\n");
+		goto out;
+	}
+
+	if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) {
+		log_err("Can't lookup result");
+		goto err;
+	}
+
+	if (value != value_gen) {
+		log_err("BPF generated cookie does not match kernel one");
+		goto err;
+	}
+
+	if (value_mss < 536 || value_mss > USHRT_MAX) {
+		log_err("Unexpected MSS retrieved");
 		goto err;
 	}
 
@@ -163,13 +207,14 @@ int main(int argc, char **argv)
 	int server_v6 = -1;
 	int results = -1;
 	int err = 0;
+	bool xdp;
 
 	if (argc < 2) {
 		fprintf(stderr, "Usage: %s prog_id\n", argv[0]);
 		exit(1);
 	}
 
-	results = get_map_fd_by_prog_id(atoi(argv[1]));
+	results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
 	if (results < 0) {
 		log_err("Can't get map");
 		goto err;
@@ -194,10 +239,10 @@ int main(int argc, char **argv)
 	if (server_v6 == -1)
 		goto err;
 
-	if (run_test(server, results))
+	if (run_test(server, results, xdp))
 		goto err;
 
-	if (run_test(server_v6, results))
+	if (run_test(server_v6, results, xdp))
 		goto err;
 
 	printf("ok\n");
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 86152d9ae95b..f9765ddf0761 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -17,6 +17,7 @@
 #include <linux/rtnetlink.h>
 #include <signal.h>
 #include <linux/perf_event.h>
+#include <linux/err.h>
 
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
@@ -30,28 +31,34 @@
 pthread_t tid;
 int rx_callbacks;
 
-static int dummyfn(void *data, int size)
+static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
 {
 	struct tcp_notifier *t = data;
 
 	if (t->type != 0xde || t->subtype != 0xad ||
 	    t->source != 0xbe || t->hash != 0xef)
-		return 1;
+		return;
 	rx_callbacks++;
-	return 0;
 }
 
-void tcp_notifier_poller(int fd)
+void tcp_notifier_poller(struct perf_buffer *pb)
 {
-	while (1)
-		perf_event_poller(fd, dummyfn);
+	int err;
+
+	while (1) {
+		err = perf_buffer__poll(pb, 100);
+		if (err < 0 && err != -EINTR) {
+			printf("failed perf_buffer__poll: %d\n", err);
+			return;
+		}
+	}
 }
 
 static void *poller_thread(void *arg)
 {
-	int fd = *(int *)arg;
+	struct perf_buffer *pb = arg;
 
-	tcp_notifier_poller(fd);
+	tcp_notifier_poller(pb);
 	return arg;
 }
 
@@ -60,52 +67,20 @@ int verify_result(const struct tcpnotify_globals *result)
 	return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1);
 }
 
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-			const char *name)
-{
-	struct bpf_map *map;
-
-	map = bpf_object__find_map_by_name(obj, name);
-	if (!map) {
-		printf("%s:FAIL:map '%s' not found\n", test, name);
-		return -1;
-	}
-	return bpf_map__fd(map);
-}
-
-static int setup_bpf_perf_event(int mapfd)
-{
-	struct perf_event_attr attr = {
-		.sample_type = PERF_SAMPLE_RAW,
-		.type = PERF_TYPE_SOFTWARE,
-		.config = PERF_COUNT_SW_BPF_OUTPUT,
-	};
-	int key = 0;
-	int pmu_fd;
-
-	pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0);
-	if (pmu_fd < 0)
-		return pmu_fd;
-	bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY);
-
-	ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	return pmu_fd;
-}
-
 int main(int argc, char **argv)
 {
 	const char *file = "test_tcpnotify_kern.o";
-	int prog_fd, map_fd, perf_event_fd;
+	struct bpf_map *perf_map, *global_map;
+	struct perf_buffer_opts pb_opts = {};
 	struct tcpnotify_globals g = {0};
+	struct perf_buffer *pb = NULL;
 	const char *cg_path = "/foo";
+	int prog_fd, rv, cg_fd = -1;
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
-	int cg_fd = -1;
-	__u32 key = 0;
-	int rv;
 	char test_script[80];
-	int pmu_fd;
 	cpu_set_t cpuset;
+	__u32 key = 0;
 
 	CPU_ZERO(&cpuset);
 	CPU_SET(0, &cpuset);
@@ -133,19 +108,24 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
-	perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map");
-	if (perf_event_fd < 0)
+	perf_map = bpf_object__find_map_by_name(obj, "perf_event_map");
+	if (!perf_map) {
+		printf("FAIL:map '%s' not found\n", "perf_event_map");
 		goto err;
+	}
 
-	map_fd = bpf_find_map(__func__, obj, "global_map");
-	if (map_fd < 0)
-		goto err;
+	global_map = bpf_object__find_map_by_name(obj, "global_map");
+	if (!global_map) {
+		printf("FAIL:map '%s' not found\n", "global_map");
+		return -1;
+	}
 
-	pmu_fd = setup_bpf_perf_event(perf_event_fd);
-	if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0)
+	pb_opts.sample_cb = dummyfn;
+	pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
+	if (IS_ERR(pb))
 		goto err;
 
-	pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd);
+	pthread_create(&tid, NULL, poller_thread, pb);
 
 	sprintf(test_script,
 		"iptables -A INPUT -p tcp --dport %d -j DROP",
@@ -162,7 +142,7 @@ int main(int argc, char **argv)
 		TESTPORT);
 	system(test_script);
 
-	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
 	if (rv != 0) {
 		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
 		goto err;
@@ -182,5 +162,7 @@ err:
 	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
 	close(cg_fd);
 	cleanup_cgroup_environment();
+	if (!IS_ERR_OR_NULL(pb))
+		perf_buffer__free(pb);
 	return error;
 }
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 84135d5f4b35..44e2d640b088 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	18
+#define MAX_NR_MAPS	19
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -84,6 +84,7 @@ struct bpf_test {
 	int fixup_map_array_wo[MAX_FIXUPS];
 	int fixup_map_array_small[MAX_FIXUPS];
 	int fixup_sk_storage_map[MAX_FIXUPS];
+	int fixup_map_event_output[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t insn_processed;
@@ -632,6 +633,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_array_wo = test->fixup_map_array_wo;
 	int *fixup_map_array_small = test->fixup_map_array_small;
 	int *fixup_sk_storage_map = test->fixup_sk_storage_map;
+	int *fixup_map_event_output = test->fixup_map_event_output;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -793,6 +795,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_sk_storage_map++;
 		} while (*fixup_sk_storage_map);
 	}
+	if (*fixup_map_event_output) {
+		map_fds[18] = __create_map(BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+					   sizeof(int), sizeof(int), 1, 0);
+		do {
+			prog[*fixup_map_event_output].imm = map_fds[18];
+			fixup_map_event_output++;
+		} while (*fixup_map_event_output);
+	}
 }
 
 static int set_admin(bool admin)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index b47f205f0310..7f989b3e4e22 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -86,128 +86,3 @@ long ksym_get_addr(const char *name)
 
 	return 0;
 }
-
-static int page_size;
-static int page_cnt = 8;
-static struct perf_event_mmap_page *header;
-
-int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
-{
-	void *base;
-	int mmap_size;
-
-	page_size = getpagesize();
-	mmap_size = page_size * (page_cnt + 1);
-
-	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	if (base == MAP_FAILED) {
-		printf("mmap err\n");
-		return -1;
-	}
-
-	*header = base;
-	return 0;
-}
-
-int perf_event_mmap(int fd)
-{
-	return perf_event_mmap_header(fd, &header);
-}
-
-static int perf_event_poll(int fd)
-{
-	struct pollfd pfd = { .fd = fd, .events = POLLIN };
-
-	return poll(&pfd, 1, 1000);
-}
-
-struct perf_event_sample {
-	struct perf_event_header header;
-	__u32 size;
-	char data[];
-};
-
-static enum bpf_perf_event_ret
-bpf_perf_event_print(struct perf_event_header *hdr, void *private_data)
-{
-	struct perf_event_sample *e = (struct perf_event_sample *)hdr;
-	perf_event_print_fn fn = private_data;
-	int ret;
-
-	if (e->header.type == PERF_RECORD_SAMPLE) {
-		ret = fn(e->data, e->size);
-		if (ret != LIBBPF_PERF_EVENT_CONT)
-			return ret;
-	} else if (e->header.type == PERF_RECORD_LOST) {
-		struct {
-			struct perf_event_header header;
-			__u64 id;
-			__u64 lost;
-		} *lost = (void *) e;
-		printf("lost %lld events\n", lost->lost);
-	} else {
-		printf("unknown event type=%d size=%d\n",
-		       e->header.type, e->header.size);
-	}
-
-	return LIBBPF_PERF_EVENT_CONT;
-}
-
-int perf_event_poller(int fd, perf_event_print_fn output_fn)
-{
-	enum bpf_perf_event_ret ret;
-	void *buf = NULL;
-	size_t len = 0;
-
-	for (;;) {
-		perf_event_poll(fd);
-		ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
-						 page_size, &buf, &len,
-						 bpf_perf_event_print,
-						 output_fn);
-		if (ret != LIBBPF_PERF_EVENT_CONT)
-			break;
-	}
-	free(buf);
-
-	return ret;
-}
-
-int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
-			    int num_fds, perf_event_print_fn output_fn)
-{
-	enum bpf_perf_event_ret ret;
-	struct pollfd *pfds;
-	void *buf = NULL;
-	size_t len = 0;
-	int i;
-
-	pfds = calloc(num_fds, sizeof(*pfds));
-	if (!pfds)
-		return LIBBPF_PERF_EVENT_ERROR;
-
-	for (i = 0; i < num_fds; i++) {
-		pfds[i].fd = fds[i];
-		pfds[i].events = POLLIN;
-	}
-
-	for (;;) {
-		poll(pfds, num_fds, 1000);
-		for (i = 0; i < num_fds; i++) {
-			if (!pfds[i].revents)
-				continue;
-
-			ret = bpf_perf_event_read_simple(headers[i],
-							 page_cnt * page_size,
-							 page_size, &buf, &len,
-							 bpf_perf_event_print,
-							 output_fn);
-			if (ret != LIBBPF_PERF_EVENT_CONT)
-				break;
-		}
-	}
-	free(buf);
-	free(pfds);
-
-	return ret;
-}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 18924f23db1b..aa4dcfe18050 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -3,7 +3,6 @@
 #define __TRACE_HELPER_H
 
 #include <libbpf.h>
-#include <linux/perf_event.h>
 
 struct ksym {
 	long addr;
@@ -14,12 +13,4 @@ int load_kallsyms(void);
 struct ksym *ksym_search(long key);
 long ksym_get_addr(const char *name);
 
-typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
-
-int perf_event_mmap(int fd);
-int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
-/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
-int perf_event_poller(int fd, perf_event_print_fn output_fn);
-int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
-			    int num_fds, perf_event_print_fn output_fn);
 #endif
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
new file mode 100644
index 000000000000..130553e19eca
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -0,0 +1,94 @@
+/* instructions used to output a skb based software event, produced
+ * from code snippet:
+ * struct TMP {
+ *  uint64_t tmp;
+ * } tt;
+ * tt.tmp = 5;
+ * bpf_perf_event_output(skb, &connection_tracking_event_map, 0,
+ *			 &tt, sizeof(tt));
+ * return 1;
+ *
+ * the bpf assembly from llvm is:
+ *        0:       b7 02 00 00 05 00 00 00         r2 = 5
+ *        1:       7b 2a f8 ff 00 00 00 00         *(u64 *)(r10 - 8) = r2
+ *        2:       bf a4 00 00 00 00 00 00         r4 = r10
+ *        3:       07 04 00 00 f8 ff ff ff         r4 += -8
+ *        4:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00    r2 = 0ll
+ *        6:       b7 03 00 00 00 00 00 00         r3 = 0
+ *        7:       b7 05 00 00 08 00 00 00         r5 = 8
+ *        8:       85 00 00 00 19 00 00 00         call 25
+ *        9:       b7 00 00 00 01 00 00 00         r0 = 1
+ *       10:       95 00 00 00 00 00 00 00         exit
+ *
+ *     The reason I put the code here instead of fill_helpers is that map fixup
+ *     is against the insns, instead of filled prog.
+ */
+
+#define __PERF_EVENT_INSNS__					\
+	BPF_MOV64_IMM(BPF_REG_2, 5),				\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),		\
+	BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),			\
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),			\
+	BPF_LD_MAP_FD(BPF_REG_2, 0),				\
+	BPF_MOV64_IMM(BPF_REG_3, 0),				\
+	BPF_MOV64_IMM(BPF_REG_5, 8),				\
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,		\
+		     BPF_FUNC_perf_event_output),		\
+	BPF_MOV64_IMM(BPF_REG_0, 1),				\
+	BPF_EXIT_INSN(),
+{
+	"perfevent for sockops",
+	.insns = { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for tc",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for lwt out",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_LWT_OUT,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for xdp",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_XDP,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for socket filter",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for sk_skb",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for cgroup skb",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},