From e542f2c4cd16d49392abf3349341d58153d3c603 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Dec 2021 11:59:03 -0800 Subject: libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF The need to increase RLIMIT_MEMLOCK to do anything useful with BPF is one of the first extremely frustrating gotchas that all new BPF users go through and in some cases have to learn it a very hard way. Luckily, starting with upstream Linux kernel version 5.11, BPF subsystem dropped the dependency on memlock and uses memcg-based memory accounting instead. Unfortunately, detecting memcg-based BPF memory accounting is far from trivial (as can be evidenced by this patch), so in practice most BPF applications still do unconditional RLIMIT_MEMLOCK increase. As we move towards libbpf 1.0, it would be good to allow users to forget about RLIMIT_MEMLOCK vs memcg and let libbpf do the sensible adjustment automatically. This patch paves the way forward in this matter. Libbpf will do feature detection of memcg-based accounting, and if detected, will do nothing. But if the kernel is too old, just like BCC, libbpf will automatically increase RLIMIT_MEMLOCK on behalf of user application ([0]). As this is technically a breaking change, during the transition period applications have to opt into libbpf 1.0 mode by setting LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK bit when calling libbpf_set_strict_mode(). Libbpf allows to control the exact amount of set RLIMIT_MEMLOCK limit with libbpf_set_memlock_rlim_max() API. Passing 0 will make libbpf do nothing with RLIMIT_MEMLOCK. libbpf_set_memlock_rlim_max() has to be called before the first bpf_prog_load(), bpf_btf_load(), or bpf_object__load() call, otherwise it has no effect and will return -EBUSY. [0] Closes: https://github.com/libbpf/libbpf/issues/369 Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211214195904.1785155-2-andrii@kernel.org --- tools/lib/bpf/bpf.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'tools/lib/bpf/bpf.c') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1f84d706eb3e..9b64eed2b003 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" @@ -94,6 +96,77 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int return fd; } +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and progs. This was done in [0]. + * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in + * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. + * + * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ + * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") + */ +int probe_memcg_account(void) +{ + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + struct bpf_insn insns[] = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); + union bpf_attr attr; + int prog_fd; + + /* attempt loading freplace trying to use custom BTF */ + memset(&attr, 0, prog_load_attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + attr.license = ptr_to_u64("GPL"); + + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + if (prog_fd >= 0) { + close(prog_fd); + return 1; + } + return 0; +} + +static bool memlock_bumped; +static rlim_t memlock_rlim = RLIM_INFINITY; + +int libbpf_set_memlock_rlim(size_t memlock_bytes) +{ + if (memlock_bumped) + return libbpf_err(-EBUSY); + + memlock_rlim = memlock_bytes; + return 0; +} + +int bump_rlimit_memlock(void) +{ + struct rlimit rlim; + + /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ + if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) + return 0; + + /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ + if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + return 0; + + memlock_bumped = true; + + /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + if (memlock_rlim == 0) + return 0; + + rlim.rlim_cur = rlim.rlim_max = memlock_rlim; + if (setrlimit(RLIMIT_MEMLOCK, &rlim)) + return -errno; + + return 0; +} + int bpf_map_create(enum bpf_map_type map_type, const char *map_name, __u32 key_size, @@ -105,6 +178,8 @@ int bpf_map_create(enum bpf_map_type map_type, union bpf_attr attr; int fd; + bump_rlimit_memlock(); + memset(&attr, 0, attr_sz); if (!OPTS_VALID(opts, bpf_map_create_opts)) @@ -251,6 +326,8 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, union bpf_attr attr; char *log_buf; + bump_rlimit_memlock(); + if (!OPTS_VALID(opts, bpf_prog_load_opts)) return libbpf_err(-EINVAL); @@ -456,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, union bpf_attr attr; int fd; + bump_rlimit_memlock(); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; @@ -1056,6 +1135,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa __u32 log_level; int fd; + bump_rlimit_memlock(); + memset(&attr, 0, attr_sz); if (!OPTS_VALID(opts, bpf_btf_load_opts)) -- cgit v1.2.3-58-ga151