summaryrefslogtreecommitdiff
path: root/include/uapi/linux/bpf.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 16:43:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 16:43:06 -0700
commit9e9fb7655ed585da8f468e29221f0ba194a5f613 (patch)
treed2c51887389b8297635a5b90d5766897f00fe928 /include/uapi/linux/bpf.h
parent86ac54e79fe09b34c52691a780a6e31d12fa57f4 (diff)
parent29ce8f9701072fc221d9c38ad952de1a9578f95c (diff)
Merge tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Enable memcg accounting for various networking objects. BPF: - Introduce bpf timers. - Add perf link and opaque bpf_cookie which the program can read out again, to be used in libbpf-based USDT library. - Add bpf_task_pt_regs() helper to access user space pt_regs in kprobes, to help user space stack unwinding. - Add support for UNIX sockets for BPF sockmap. - Extend BPF iterator support for UNIX domain sockets. - Allow BPF TCP congestion control progs and bpf iterators to call bpf_setsockopt(), e.g. to switch to another congestion control algorithm. Protocols: - Support IOAM Pre-allocated Trace with IPv6. - Support Management Component Transport Protocol. - bridge: multicast: add vlan support. - netfilter: add hooks for the SRv6 lightweight tunnel driver. - tcp: - enable mid-stream window clamping (by user space or BPF) - allow data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD - more accurate DSACK processing for RACK-TLP - mptcp: - add full mesh path manager option - add partial support for MP_FAIL - improve use of backup subflows - optimize option processing - af_unix: add OOB notification support. - ipv6: add IFLA_INET6_RA_MTU to expose MTU value advertised by the router. - mac80211: Target Wake Time support in AP mode. - can: j1939: extend UAPI to notify about RX status. Driver APIs: - Add page frag support in page pool API. - Many improvements to the DSA (distributed switch) APIs. - ethtool: extend IRQ coalesce uAPI with timer reset modes. - devlink: control which auxiliary devices are created. - Support CAN PHYs via the generic PHY subsystem. - Proper cross-chip support for tag_8021q. - Allow TX forwarding for the software bridge data path to be offloaded to capable devices. Drivers: - veth: more flexible channels number configuration. - openvswitch: introduce per-cpu upcall dispatch. - Add internet mix (IMIX) mode to pktgen. - Transparently handle XDP operations in the bonding driver. - Add LiteETH network driver. - Renesas (ravb): - support Gigabit Ethernet IP - NXP Ethernet switch (sja1105): - fast aging support - support for "H" switch topologies - traffic termination for ports under VLAN-aware bridge - Intel 1G Ethernet - support getcrosststamp() with PCIe PTM (Precision Time Measurement) for better time sync - support Credit-Based Shaper (CBS) offload, enabling HW traffic prioritization and bandwidth reservation - Broadcom Ethernet (bnxt) - support pulse-per-second output - support larger Rx rings - Mellanox Ethernet (mlx5) - support ethtool RSS contexts and MQPRIO channel mode - support LAG offload with bridging - support devlink rate limit API - support packet sampling on tunnels - Huawei Ethernet (hns3): - basic devlink support - add extended IRQ coalescing support - report extended link state - Netronome Ethernet (nfp): - add conntrack offload support - Broadcom WiFi (brcmfmac): - add WPA3 Personal with FT to supported cipher suites - support 43752 SDIO device - Intel WiFi (iwlwifi): - support scanning hidden 6GHz networks - support for a new hardware family (Bz) - Xen pv driver: - harden netfront against malicious backends - Qualcomm mobile - ipa: refactor power management and enable automatic suspend - mhi: move MBIM to WWAN subsystem interfaces Refactor: - Ambient BPF run context and cgroup storage cleanup. - Compat rework for ndo_ioctl. Old code removal: - prism54 remove the obsoleted driver, deprecated by the p54 driver. - wan: remove sbni/granch driver" * tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1715 commits) net: Add depends on OF_NET for LiteX's LiteETH ipv6: seg6: remove duplicated include net: hns3: remove unnecessary spaces net: hns3: add some required spaces net: hns3: clean up a type mismatch warning net: hns3: refine function hns3_set_default_feature() ipv6: remove duplicated 'net/lwtunnel.h' include net: w5100: check return value after calling platform_get_resource() net/mlxbf_gige: Make use of devm_platform_ioremap_resourcexxx() net: mdio: mscc-miim: Make use of the helper function devm_platform_ioremap_resource() net: mdio-ipq4019: Make use of devm_platform_ioremap_resource() fou: remove sparse errors ipv4: fix endianness issue in inet_rtm_getroute_build_skb() octeontx2-af: Set proper errorcode for IPv4 checksum errors octeontx2-af: Fix static code analyzer reported issues octeontx2-af: Fix mailbox errors in nix_rss_flowkey_cfg octeontx2-af: Fix loop in free and unmap counter af_unix: fix potential NULL deref in unix_dgram_connect() dpaa2-eth: Replace strlcpy with strscpy octeontx2-af: Use NDC TX for transmit packet data ...
Diffstat (limited to 'include/uapi/linux/bpf.h')
-rw-r--r--include/uapi/linux/bpf.h119
1 files changed, 114 insertions, 5 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bf9252c7381e..791f31dd0abe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
* **BPF_PROG_TYPE_SK_LOOKUP**
* *data_in* and *data_out* must be NULL.
*
- * **BPF_PROG_TYPE_XDP**
- * *ctx_in* and *ctx_out* must be NULL.
- *
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
*
@@ -996,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1449,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -3249,7 +3255,7 @@ union bpf_attr {
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
* It checks the selected socket is matching the incoming
* request in the socket buffer.
* Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
* Execute close syscall for given FD.
* Return
* A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ * Description
+ * Initialize the timer.
+ * First 4 bits of *flags* specify clockid.
+ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ * All other bits of *flags* are reserved.
+ * The verifier will reject the program if *timer* is not from
+ * the same *map*.
+ * Return
+ * 0 on success.
+ * **-EBUSY** if *timer* is already initialized.
+ * **-EINVAL** if invalid *flags* are passed.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ * Description
+ * Configure the timer to call *callback_fn* static function.
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ * Description
+ * Set timer expiration N nanoseconds from the current time. The
+ * configured callback will be invoked in soft irq context on some cpu
+ * and will not repeat unless another bpf_timer_start() is made.
+ * In such case the next invocation can migrate to a different cpu.
+ * Since struct bpf_timer is a field inside map element the map
+ * owns the timer. The bpf_timer_set_callback() will increment refcnt
+ * of BPF program to make sure that callback_fn code stays valid.
+ * When user space reference to a map reaches zero all timers
+ * in a map are cancelled and corresponding program's refcnts are
+ * decremented. This is done to make sure that Ctrl-C of a user
+ * process doesn't leave any timers running. If map is pinned in
+ * bpffs the callback_fn can re-arm itself indefinitely.
+ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ * cancel and free the timer in the given map element.
+ * The map can contain timers that invoke callback_fn-s from different
+ * programs. The same callback_fn can serve different timers from
+ * different maps if key/value layout matches across maps.
+ * Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ * or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ * Description
+ * Cancel the timer and wait for callback_fn to finish if it was running.
+ * Return
+ * 0 if the timer was not active.
+ * 1 if the timer was active.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ * own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ * Description
+ * Get address of the traced function (for tracing and kprobe programs).
+ * Return
+ * Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4951,6 +5048,13 @@ union bpf_attr {
FN(sys_bpf), \
FN(btf_find_by_name_kind), \
FN(sys_close), \
+ FN(timer_init), \
+ FN(timer_set_callback), \
+ FN(timer_start), \
+ FN(timer_cancel), \
+ FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
__u32 val;
};
+struct bpf_timer {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.