diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/Makefile | 8 | ||||
-rw-r--r-- | kernel/bpf/bpf_lsm.c | 1 | ||||
-rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 2 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 189 | ||||
-rw-r--r-- | kernel/bpf/core.c | 8 | ||||
-rw-r--r-- | kernel/bpf/crypto.c | 42 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 27 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 45 | ||||
-rw-r--r-- | kernel/bpf/log.c | 6 | ||||
-rw-r--r-- | kernel/bpf/task_iter.c | 9 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 140 |
11 files changed, 339 insertions, 138 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7eb9ad3a3ae6..0291eef9ce92 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -50,5 +50,11 @@ endif obj-$(CONFIG_BPF_PRELOAD) += preload/ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o -$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE +obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o +obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o + +# Some source files are common to libbpf. +vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf + +$(obj)/%.o: %.c FORCE $(call if_changed_rule,cc_o_c) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 68240c3c6e7d..08a338e1f231 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -280,6 +280,7 @@ BTF_ID(func, bpf_lsm_cred_prepare) BTF_ID(func, bpf_lsm_file_ioctl) BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) +BTF_ID(func, bpf_lsm_file_post_open) BTF_ID(func, bpf_lsm_file_receive) BTF_ID(func, bpf_lsm_inode_create) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index a2cf31b14be4..0d515ec57aa5 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -573,7 +573,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, } size = arch_prepare_bpf_trampoline(NULL, image + image_off, - image + PAGE_SIZE, + image + image_off + size, model, flags, tlinks, stub_func); if (size <= 0) { if (image != *_image) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7928d920056f..4ff11779699e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -274,6 +274,7 @@ struct btf { u32 start_str_off; /* first string offset (0 for base BTF) */ char name[MODULE_NAME_LEN]; bool kernel_btf; + __u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */ }; enum verifier_phase { @@ -530,6 +531,11 @@ static bool btf_type_is_decl_tag_target(const struct btf_type *t) btf_type_is_var(t) || btf_type_is_typedef(t); } +bool btf_is_vmlinux(const struct btf *btf) +{ + return btf->kernel_btf && !btf->base_btf; +} + u32 btf_nr_types(const struct btf *btf) { u32 total = 0; @@ -772,7 +778,7 @@ static bool __btf_name_char_ok(char c, bool first) return true; } -static const char *btf_str_by_offset(const struct btf *btf, u32 offset) +const char *btf_str_by_offset(const struct btf *btf, u32 offset) { while (offset < btf->start_str_off) btf = btf->base_btf; @@ -1670,14 +1676,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf) if (!tab) return; - /* For module BTF, we directly assign the sets being registered, so - * there is nothing to free except kfunc_set_tab. - */ - if (btf_is_module(btf)) - goto free_tab; for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) kfree(tab->sets[hook]); -free_tab: kfree(tab); btf->kfunc_set_tab = NULL; } @@ -1735,7 +1735,12 @@ static void btf_free(struct btf *btf) kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); - kvfree(btf->data); + /* vmlinux does not allocate btf->data, it simply points it at + * __start_BTF. + */ + if (!btf_is_vmlinux(btf)) + kvfree(btf->data); + kvfree(btf->base_id_map); kfree(btf); } @@ -1764,6 +1769,23 @@ void btf_put(struct btf *btf) } } +struct btf *btf_base_btf(const struct btf *btf) +{ + return btf->base_btf; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return &btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf_nr_types(base_btf); + btf->start_str_off = base_btf->hdr.str_len; +} + static int env_resolve_init(struct btf_verifier_env *env) { struct btf *btf = env->btf; @@ -5820,6 +5842,15 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) return ctx_type->type; } +bool btf_is_projection_of(const char *pname, const char *tname) +{ + if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) + return true; + if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + return true; + return false; +} + bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) @@ -5882,9 +5913,7 @@ again: * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ - if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) - return true; - if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + if (btf_is_projection_of(ctx_tname, tname)) return true; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to @@ -6076,23 +6105,15 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) -struct btf *btf_parse_vmlinux(void) +static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name, + void *data, unsigned int data_size) { - struct btf_verifier_env *env = NULL; - struct bpf_verifier_log *log; struct btf *btf = NULL; int err; if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) return ERR_PTR(-ENOENT); - env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); - if (!env) - return ERR_PTR(-ENOMEM); - - log = &env->log; - log->level = BPF_LOG_KERNEL; - btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; @@ -6100,10 +6121,10 @@ struct btf *btf_parse_vmlinux(void) } env->btf = btf; - btf->data = __start_BTF; - btf->data_size = __stop_BTF - __start_BTF; + btf->data = data; + btf->data_size = data_size; btf->kernel_btf = true; - snprintf(btf->name, sizeof(btf->name), "vmlinux"); + snprintf(btf->name, sizeof(btf->name), "%s", name); err = btf_parse_hdr(env); if (err) @@ -6123,20 +6144,11 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; - /* btf_parse_vmlinux() runs under bpf_verifier_lock */ - bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); - refcount_set(&btf->refcnt, 1); - err = btf_alloc_id(btf); - if (err) - goto errout; - - btf_verifier_env_free(env); return btf; errout: - btf_verifier_env_free(env); if (btf) { kvfree(btf->types); kfree(btf); @@ -6144,19 +6156,61 @@ errout: return ERR_PTR(err); } +struct btf *btf_parse_vmlinux(void) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf; + int err; + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF); + if (IS_ERR(btf)) + goto err_out; + + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); + err = btf_alloc_id(btf); + if (err) { + btf_free(btf); + btf = ERR_PTR(err); + } +err_out: + btf_verifier_env_free(env); + return btf; +} + +/* If .BTF_ids section was created with distilled base BTF, both base and + * split BTF ids will need to be mapped to actual base/split ids for + * BTF now that it has been relocated. + */ +static __u32 btf_relocate_id(const struct btf *btf, __u32 id) +{ + if (!btf->base_btf || !btf->base_id_map) + return id; + return btf->base_id_map[id]; +} + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES -static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) +static struct btf *btf_parse_module(const char *module_name, const void *data, + unsigned int data_size, void *base_data, + unsigned int base_data_size) { + struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL; struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; - struct btf *btf = NULL, *base_btf; - int err; + int err = 0; - base_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(base_btf)) - return base_btf; - if (!base_btf) + vmlinux_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(vmlinux_btf)) + return vmlinux_btf; + if (!vmlinux_btf) return ERR_PTR(-EINVAL); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); @@ -6166,6 +6220,16 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u log = &env->log; log->level = BPF_LOG_KERNEL; + if (base_data) { + base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size); + if (IS_ERR(base_btf)) { + err = PTR_ERR(base_btf); + goto errout; + } + } else { + base_btf = vmlinux_btf; + } + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; @@ -6205,12 +6269,22 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u if (err) goto errout; + if (base_btf != vmlinux_btf) { + err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map); + if (err) + goto errout; + btf_free(base_btf); + base_btf = vmlinux_btf; + } + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; errout: btf_verifier_env_free(env); + if (base_btf != vmlinux_btf) + btf_free(base_btf); if (btf) { kvfree(btf->data); kvfree(btf->types); @@ -7763,7 +7837,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, err = -ENOMEM; goto out; } - btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size, + mod->btf_base_data, mod->btf_base_data_size); if (IS_ERR(btf)) { kfree(btf_mod); if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) { @@ -8087,7 +8162,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, bool add_filter = !!kset->filter; struct btf_kfunc_set_tab *tab; struct btf_id_set8 *set; - u32 set_cnt; + u32 set_cnt, i; int ret; if (hook >= BTF_KFUNC_HOOK_MAX) { @@ -8133,21 +8208,15 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, goto end; } - /* We don't need to allocate, concatenate, and sort module sets, because - * only one is allowed per hook. Hence, we can directly assign the - * pointer and return. - */ - if (!vmlinux_set) { - tab->sets[hook] = add_set; - goto do_add_filter; - } - /* In case of vmlinux sets, there may be more than one set being * registered per hook. To create a unified set, we allocate a new set * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary * searching the set using btf_id_set8_contains function work. + * + * For module sets, we need to allocate as we may need to relocate + * BTF ids. */ set_cnt = set ? set->cnt : 0; @@ -8177,11 +8246,14 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, /* Concatenate the two sets */ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); + /* Now that the set is copied, update with relocated BTF ids */ + for (i = set->cnt; i < set->cnt + add_set->cnt; i++) + set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id); + set->cnt += add_set->cnt; sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); -do_add_filter: if (add_filter) { hook_filter = &tab->hook_filters[hook]; hook_filter->filters[hook_filter->nr_filters++] = kset->filter; @@ -8301,7 +8373,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, return PTR_ERR(btf); for (i = 0; i < kset->set->cnt; i++) { - ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, + ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id), kset->set->pairs[i].flags); if (ret) goto err_out; @@ -8365,7 +8437,7 @@ static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc u32 nr_args, i; for (i = 0; i < cnt; i++) { - dtor_btf_id = dtors[i].kfunc_btf_id; + dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id); dtor_func = btf_type_by_id(btf, dtor_btf_id); if (!dtor_func || !btf_type_is_func(dtor_func)) @@ -8400,7 +8472,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c { struct btf_id_dtor_kfunc_tab *tab; struct btf *btf; - u32 tab_cnt; + u32 tab_cnt, i; int ret; btf = btf_get_module_btf(owner); @@ -8451,6 +8523,13 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c btf->dtor_kfunc_tab = tab; memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); + + /* remap BTF ids based on BTF relocation (if any) */ + for (i = tab_cnt; i < tab_cnt + add_cnt; i++) { + tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id); + tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id); + } + tab->cnt += add_cnt; sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 695a0fb2cd4d..7ee62e38faf0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1173,8 +1173,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, } /* Copy JITed text from rw_header to its final location, the ro_header. */ -int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, - struct bpf_binary_header *ro_header, +int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header) { void *ptr; @@ -2742,8 +2741,7 @@ static void bpf_free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len) +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len) { #ifdef CONFIG_BPF_SYSCALL struct btf_mod_pair *btf_mod; @@ -2760,7 +2758,7 @@ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, static void bpf_free_used_btfs(struct bpf_prog_aux *aux) { - __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt); + __bpf_free_used_btfs(aux->used_btfs, aux->used_btf_cnt); kfree(aux->used_btfs); } diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c index 2bee4af91e38..94854cd9c4cc 100644 --- a/kernel/bpf/crypto.c +++ b/kernel/bpf/crypto.c @@ -275,7 +275,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, if (__bpf_dynptr_is_rdonly(dst)) return -EINVAL; - siv_len = __bpf_dynptr_size(siv); + siv_len = siv ? __bpf_dynptr_size(siv) : 0; src_len = __bpf_dynptr_size(src); dst_len = __bpf_dynptr_size(dst); if (!src_len || !dst_len) @@ -303,36 +303,44 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, /** * bpf_crypto_decrypt() - Decrypt buffer using configured context and IV provided. - * @ctx: The crypto context being used. The ctx must be a trusted pointer. - * @src: bpf_dynptr to the encrypted data. Must be a trusted pointer. - * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer. - * @siv: bpf_dynptr to IV data and state data to be used by decryptor. + * @ctx: The crypto context being used. The ctx must be a trusted pointer. + * @src: bpf_dynptr to the encrypted data. Must be a trusted pointer. + * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer. + * @siv__nullable: bpf_dynptr to IV data and state data to be used by decryptor. May be NULL. * * Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv__nullable) { - return bpf_crypto_crypt(ctx, src, dst, siv, true); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv__nullable; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, true); } /** * bpf_crypto_encrypt() - Encrypt buffer using configured context and IV provided. - * @ctx: The crypto context being used. The ctx must be a trusted pointer. - * @src: bpf_dynptr to the plain data. Must be a trusted pointer. - * @dst: bpf_dynptr to buffer where to store the result. Must be a trusted pointer. - * @siv: bpf_dynptr to IV data and state data to be used by decryptor. + * @ctx: The crypto context being used. The ctx must be a trusted pointer. + * @src: bpf_dynptr to the plain data. Must be a trusted pointer. + * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer. + * @siv__nullable: bpf_dynptr to IV data and state data to be used by decryptor. May be NULL. * * Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv__nullable) { - return bpf_crypto_crypt(ctx, src, dst, siv, false); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv__nullable; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, false); } __bpf_kfunc_end_defs(); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b18d4a14a0a7..9e0e3b0a18e4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -106,7 +106,7 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; } -static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) +static int dev_map_alloc_check(union bpf_attr *attr) { u32 valsize = attr->value_size; @@ -120,23 +120,28 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) attr->map_flags & ~DEV_CREATE_FLAG_MASK) return -EINVAL; + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + /* Hash table size must be power of 2; roundup_pow_of_two() + * can overflow into UB on 32-bit arches + */ + if (attr->max_entries > 1UL << 31) + return -EINVAL; + } + + return 0; +} + +static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) +{ /* Lookup returns a pointer straight to dev->ifindex, so make sure the * verifier prevents writes from the BPF side */ attr->map_flags |= BPF_F_RDONLY_PROG; - - bpf_map_init_from_attr(&dtab->map, attr); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - /* hash table size must be power of 2; roundup_pow_of_two() can - * overflow into UB on 32-bit arches, so check that first - */ - if (dtab->map.max_entries > 1UL << 31) - return -EINVAL; - + /* Hash table size must be power of 2 */ dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); - dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) @@ -1036,6 +1041,7 @@ static u64 dev_map_mem_usage(const struct bpf_map *map) BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab) const struct bpf_map_ops dev_map_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = dev_map_alloc_check, .map_alloc = dev_map_alloc, .map_free = dev_map_free, .map_get_next_key = dev_map_get_next_key, @@ -1050,6 +1056,7 @@ const struct bpf_map_ops dev_map_ops = { const struct bpf_map_ops dev_map_hash_ops = { .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = dev_map_alloc_check, .map_alloc = dev_map_alloc, .map_free = dev_map_free, .map_get_next_key = dev_map_hash_get_next_key, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6f1abcb4b084..5241ba671c5a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2433,7 +2433,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) /** * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. - * @ptr: The dynptr whose data slice to retrieve + * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the @@ -2459,9 +2459,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; @@ -2503,7 +2504,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset /** * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. - * @ptr: The dynptr whose data slice to retrieve + * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the @@ -2543,9 +2544,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; @@ -2571,11 +2574,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ - return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); + return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) @@ -2592,36 +2596,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en return 0; } -__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + return !ptr->data; } -__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } -__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, - struct bpf_dynptr_kern *clone__uninit) +__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, + struct bpf_dynptr *clone__uninit) { + struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) { - bpf_dynptr_set_null(clone__uninit); + bpf_dynptr_set_null(clone); return -EINVAL; } - *clone__uninit = *ptr; + *clone = *ptr; return 0; } @@ -2721,7 +2734,7 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) } __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *value), unsigned int flags, void *aux__ign) { @@ -2986,7 +2999,9 @@ late_initcall(kfunc_init); */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { - return bpf_dynptr_slice(ptr, 0, NULL, len); + const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; + + return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 4bd8f17a9f24..5aebfc3051e3 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -91,7 +91,7 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, goto fail; } else { u64 new_end, new_start; - u32 buf_start, buf_end, new_n; + u32 buf_start, buf_end; new_end = log->end_pos + n; if (new_end - log->start_pos >= log->len_total) @@ -708,7 +708,9 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); verbose(env, "("); if (reg->id) - verbose_a("id=%d", reg->id); + verbose_a("id=%d", reg->id & ~BPF_ADD_CONST); + if (reg->id & BPF_ADD_CONST) + verbose(env, "%+d", reg->off); if (reg->ref_obj_id) verbose_a("ref_obj_id=%d", reg->ref_obj_id); if (type_is_non_owning_ref(reg->type)) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index ec4e97c61eef..02aa9db8d796 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -261,6 +261,7 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) u32 saved_tid = info->tid; struct task_struct *curr_task; unsigned int curr_fd = info->fd; + struct file *f; /* If this function returns a non-NULL file object, * it held a reference to the task/file. @@ -286,12 +287,8 @@ again: } rcu_read_lock(); - for (;; curr_fd++) { - struct file *f; - f = task_lookup_next_fdget_rcu(curr_task, &curr_fd); - if (!f) - break; - + f = task_lookup_next_fdget_rcu(curr_task, &curr_fd); + if (f) { /* set info->fd */ info->fd = curr_fd; rcu_read_unlock(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fcecaba8668d..3d6306c363b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2982,8 +2982,10 @@ static int check_subprogs(struct bpf_verifier_env *env) if (code == (BPF_JMP | BPF_CALL) && insn[i].src_reg == 0 && - insn[i].imm == BPF_FUNC_tail_call) + insn[i].imm == BPF_FUNC_tail_call) { subprog[cur_subprog].has_tail_call = true; + subprog[cur_subprog].tail_call_reachable = true; + } if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) subprog[cur_subprog].has_ld_abs = true; @@ -3215,7 +3217,8 @@ static int insn_def_regno(const struct bpf_insn *insn) case BPF_ST: return -1; case BPF_STX: - if (BPF_MODE(insn->code) == BPF_ATOMIC && + if ((BPF_MODE(insn->code) == BPF_ATOMIC || + BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) && (insn->imm & BPF_FETCH)) { if (insn->imm == BPF_CMPXCHG) return BPF_REG_0; @@ -3991,7 +3994,7 @@ static bool idset_contains(struct bpf_idset *s, u32 id) u32 i; for (i = 0; i < s->count; ++i) - if (s->ids[i] == id) + if (s->ids[i] == (id & ~BPF_ADD_CONST)) return true; return false; @@ -4001,7 +4004,7 @@ static int idset_push(struct bpf_idset *s, u32 id) { if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) return -EFAULT; - s->ids[s->count++] = id; + s->ids[s->count++] = id & ~BPF_ADD_CONST; return 0; } @@ -4438,8 +4441,20 @@ static bool __is_pointer_value(bool allow_ptr_leaks, static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, struct bpf_reg_state *src_reg) { - if (src_reg->type == SCALAR_VALUE && !src_reg->id && - !tnum_is_const(src_reg->var_off)) + if (src_reg->type != SCALAR_VALUE) + return; + + if (src_reg->id & BPF_ADD_CONST) { + /* + * The verifier is processing rX = rY insn and + * rY->id has special linked register already. + * Cleared it, since multiple rX += const are not supported. + */ + src_reg->id = 0; + src_reg->off = 0; + } + + if (!src_reg->id && !tnum_is_const(src_reg->var_off)) /* Ensure that src_reg has a valid ID that will be copied to * dst_reg and then will be used by find_equal_scalars() to * propagate min/max range. @@ -7715,6 +7730,13 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; int err; + if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { + verbose(env, + "arg#%d expected pointer to stack or const struct bpf_dynptr\n", + regno); + return -EINVAL; + } + /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): */ @@ -9464,6 +9486,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; } } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR); + if (ret) + return ret; + ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); if (ret) return ret; @@ -10917,7 +10943,7 @@ enum { }; BTF_ID_LIST(kf_arg_btf_ids) -BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_dynptr) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) @@ -11190,6 +11216,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) return KF_ARG_PTR_TO_CTX; + if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg)) + return KF_ARG_PTR_TO_NULL; + if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) return KF_ARG_PTR_TO_ALLOC_BTF_ID; @@ -11235,9 +11264,6 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_callback(env, meta->btf, &args[argno])) return KF_ARG_PTR_TO_CALLBACK; - if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg)) - return KF_ARG_PTR_TO_NULL; - if (argno + 1 < nargs && (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) || is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]))) @@ -11268,6 +11294,8 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, bool strict_type_match = false; const struct btf *reg_btf; const char *reg_ref_tname; + bool taking_projection; + bool struct_same; u32 reg_ref_id; if (base_type(reg->type) == PTR_TO_BTF_ID) { @@ -11311,7 +11339,13 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); - if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match); + /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot + * actually use it -- it must cast to the underlying type. So we allow + * caller to pass in the underlying type. + */ + taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); + if (!taking_projection && !struct_same) { verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, btf_type_str(reg_ref_t), reg_ref_tname); @@ -11957,12 +11991,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; int clone_ref_obj_id = 0; - if (reg->type != PTR_TO_STACK && - reg->type != CONST_PTR_TO_DYNPTR) { - verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i); - return -EINVAL; - } - if (reg->type == CONST_PTR_TO_DYNPTR) dynptr_arg_type |= MEM_RDONLY; @@ -14047,6 +14075,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); u8 opcode = BPF_OP(insn->code); int err; @@ -14069,11 +14098,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (dst_reg->type != SCALAR_VALUE) ptr_reg = dst_reg; - else - /* Make sure ID is cleared otherwise dst_reg min/max could be - * incorrectly propagated into other registers by find_equal_scalars() - */ - dst_reg->id = 0; + if (BPF_SRC(insn->code) == BPF_X) { src_reg = ®s[insn->src_reg]; if (src_reg->type != SCALAR_VALUE) { @@ -14137,7 +14162,43 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } - return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + if (err) + return err; + /* + * Compilers can generate the code + * r1 = r2 + * r1 += 0x1 + * if r2 < 1000 goto ... + * use r1 in memory access + * So remember constant delta between r2 and r1 and update r1 after + * 'if' condition. + */ + if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD && + dst_reg->id && is_reg_const(src_reg, alu32)) { + u64 val = reg_const_value(src_reg, alu32); + + if ((dst_reg->id & BPF_ADD_CONST) || + /* prevent overflow in find_equal_scalars() later */ + val > (u32)S32_MAX) { + /* + * If the register already went through rX += val + * we cannot accumulate another val into rx->off. + */ + dst_reg->off = 0; + dst_reg->id = 0; + } else { + dst_reg->id |= BPF_ADD_CONST; + dst_reg->off = val; + } + } else { + /* + * Make sure ID is cleared otherwise dst_reg min/max could be + * incorrectly propagated into other registers by find_equal_scalars() + */ + dst_reg->id = 0; + } + return 0; } /* check validity of 32-bit and 64-bit arithmetic operations */ @@ -15109,12 +15170,36 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg) { + struct bpf_reg_state fake_reg; struct bpf_func_state *state; struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + if (reg->type != SCALAR_VALUE || reg == known_reg) + continue; + if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) + continue; + if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || + reg->off == known_reg->off) { + copy_register_state(reg, known_reg); + } else { + s32 saved_off = reg->off; + + fake_reg.type = SCALAR_VALUE; + __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off); + + /* reg = known_reg; reg += delta */ copy_register_state(reg, known_reg); + /* + * Must preserve off, id and add_const flag, + * otherwise another find_equal_scalars() will be incorrect. + */ + reg->off = saved_off; + + scalar32_min_max_add(reg, &fake_reg); + scalar_min_max_add(reg, &fake_reg); + reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); + } })); } @@ -16749,6 +16834,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } if (!rold->precise && exact == NOT_EXACT) return true; + if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) + return false; + if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off)) + return false; /* Why check_ids() for scalar registers? * * Consider the following BPF code: @@ -18630,8 +18719,7 @@ static void release_maps(struct bpf_verifier_env *env) /* drop refcnt of maps used by the rejected program */ static void release_btfs(struct bpf_verifier_env *env) { - __bpf_free_used_btfs(env->prog->aux, env->used_btfs, - env->used_btf_cnt); + __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt); } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ |