diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 1531 |
1 files changed, 1414 insertions, 117 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 648a549c41ae..4e7f1d085e53 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -451,10 +451,24 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON; } +static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) +{ + struct btf_record *rec = NULL; + struct btf_struct_meta *meta; + + if (reg->type == PTR_TO_MAP_VALUE) { + rec = reg->map_ptr->record; + } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { + meta = btf_find_struct_meta(reg->btf, reg->btf_id); + if (meta) + rec = meta->record; + } + return rec; +} + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { - return reg->type == PTR_TO_MAP_VALUE && - btf_record_has_field(reg->map_ptr->record, BPF_SPIN_LOCK); + return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK); } static bool type_is_rdonly_mem(u32 type) @@ -513,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id) func_id == BPF_FUNC_user_ringbuf_drain; } +static bool is_storage_get_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_sk_storage_get || + func_id == BPF_FUNC_inode_storage_get || + func_id == BPF_FUNC_task_storage_get || + func_id == BPF_FUNC_cgrp_storage_get; +} + static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, const struct bpf_map *map) { @@ -543,7 +565,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type) { - char postfix[16] = {0}, prefix[32] = {0}; + char postfix[16] = {0}, prefix[64] = {0}; static const char * const str[] = { [NOT_INIT] = "?", [SCALAR_VALUE] = "scalar", @@ -575,16 +597,15 @@ static const char *reg_type_str(struct bpf_verifier_env *env, strncpy(postfix, "_or_null", 16); } - if (type & MEM_RDONLY) - strncpy(prefix, "rdonly_", 32); - if (type & MEM_ALLOC) - strncpy(prefix, "alloc_", 32); - if (type & MEM_USER) - strncpy(prefix, "user_", 32); - if (type & MEM_PERCPU) - strncpy(prefix, "percpu_", 32); - if (type & PTR_UNTRUSTED) - strncpy(prefix, "untrusted_", 32); + snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s", + type & MEM_RDONLY ? "rdonly_" : "", + type & MEM_RINGBUF ? "ringbuf_" : "", + type & MEM_USER ? "user_" : "", + type & MEM_PERCPU ? "percpu_" : "", + type & MEM_RCU ? "rcu_" : "", + type & PTR_UNTRUSTED ? "untrusted_" : "", + type & PTR_TRUSTED ? "trusted_" : "" + ); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -1010,9 +1031,9 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < bytes) { + if (ksize(dst) < ksize(src)) { kfree(dst); - dst = kmalloc_track_caller(bytes, flags); + dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); if (!dst) return NULL; } @@ -1029,12 +1050,14 @@ out: */ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { + size_t alloc_size; void *new_arr; if (!new_n || old_n == new_n) goto out; - new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); + new_arr = krealloc(arr, alloc_size, GFP_KERNEL); if (!new_arr) { kfree(arr); return NULL; @@ -1206,8 +1229,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->frame[i] = NULL; } dst_state->speculative = src->speculative; + dst_state->active_rcu_lock = src->active_rcu_lock; dst_state->curframe = src->curframe; - dst_state->active_spin_lock = src->active_spin_lock; + dst_state->active_lock.ptr = src->active_lock.ptr; + dst_state->active_lock.id = src->active_lock.id; dst_state->branches = src->branches; dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; @@ -2506,9 +2531,11 @@ static int push_jmp_history(struct bpf_verifier_env *env, { u32 cnt = cur->jmp_history_cnt; struct bpf_idx_pair *p; + size_t alloc_size; cnt++; - p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); + alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); + p = krealloc(cur->jmp_history, alloc_size, GFP_USER); if (!p) return -ENOMEM; p[cnt - 1].idx = env->insn_idx; @@ -3844,7 +3871,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno) { const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); - int perm_flags = PTR_MAYBE_NULL; + int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED; const char *reg_name = ""; /* Only unreferenced case accepts untrusted pointers */ @@ -4241,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_trusted_reg(const struct bpf_reg_state *reg) +{ + /* A referenced register is always trusted. */ + if (reg->ref_obj_id) + return true; + + /* If a register is not referenced, it is trusted if it has the + * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the + * other type modifiers may be safe, but we elect to take an opt-in + * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are + * not. + * + * Eventually, we should make PTR_TRUSTED the single source of truth + * for whether a register is trusted. + */ + return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS && + !bpf_type_has_unsafe_modifiers(reg->type); +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -4687,17 +4733,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } - if (env->ops->btf_struct_access) { - ret = env->ops->btf_struct_access(&env->log, reg->btf, t, - off, size, atype, &btf_id, &flag); + if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) { + if (!btf_is_kernel(reg->btf)) { + verbose(env, "verifier internal error: reg->btf must be kernel btf\n"); + return -EFAULT; + } + ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); } else { - if (atype != BPF_READ) { + /* Writes are permitted with default btf_struct_access for + * program allocated objects (which always have ref_obj_id > 0), + * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. + */ + if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { verbose(env, "only read is supported\n"); return -EACCES; } - ret = btf_struct_access(&env->log, reg->btf, t, off, size, - atype, &btf_id, &flag); + if (type_is_alloc(reg->type) && !reg->ref_obj_id) { + verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n"); + return -EFAULT; + } + + ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); } if (ret < 0) @@ -4709,6 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (type_flag(reg->type) & PTR_UNTRUSTED) flag |= PTR_UNTRUSTED; + /* By default any pointer obtained from walking a trusted pointer is + * no longer trusted except the rcu case below. + */ + flag &= ~PTR_TRUSTED; + + if (flag & MEM_RCU) { + /* Mark value register as MEM_RCU only if it is protected by + * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU + * itself can already indicate trustedness inside the rcu + * read lock region. Also mark it as PTR_TRUSTED. + */ + if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg)) + flag &= ~MEM_RCU; + else + flag |= PTR_TRUSTED; + } else if (reg->type & MEM_RCU) { + /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged + * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. + */ + flag |= PTR_UNTRUSTED; + } + if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); @@ -4723,6 +4802,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, { struct bpf_reg_state *reg = regs + regno; struct bpf_map *map = reg->map_ptr; + struct bpf_reg_state map_reg; enum bpf_type_flag flag = 0; const struct btf_type *t; const char *tname; @@ -4761,7 +4841,10 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return -EACCES; } - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag); + /* Simulate access to a PTR_TO_BTF_ID */ + memset(&map_reg, 0, sizeof(map_reg)); + mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0); + ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag); if (ret < 0) return ret; @@ -5520,8 +5603,8 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return err; } -int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno) +static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) { struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; bool may_be_null = type_may_be_null(mem_reg->type); @@ -5549,23 +5632,26 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state } /* Implementation details: - * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL. + * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL. * Two bpf_map_lookups (even with the same key) will have different reg->id. - * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after - * value_or_null->value transition, since the verifier only cares about - * the range of access to valid map value pointer and doesn't care about actual - * address of the map element. + * Two separate bpf_obj_new will also have different reg->id. + * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier + * clears reg->id after value_or_null->value transition, since the verifier only + * cares about the range of access to valid map value pointer and doesn't care + * about actual address of the map element. * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps * reg->id > 0 after value_or_null->value transition. By doing so * two bpf_map_lookups will be considered two different pointers that - * point to different bpf_spin_locks. + * point to different bpf_spin_locks. Likewise for pointers to allocated objects + * returned from bpf_obj_new. * The verifier allows taking only one bpf_spin_lock at a time to avoid * dead-locks. * Since only one bpf_spin_lock is allowed the checks are simpler than * reg_is_refcounted() logic. The verifier needs to remember only * one spin_lock instead of array of acquired_refs. - * cur_state->active_spin_lock remembers which map value element got locked - * and clears it after bpf_spin_unlock. + * cur_state->active_lock remembers which map value element or allocated + * object got locked and clears it after bpf_spin_unlock. */ static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock) @@ -5573,8 +5659,10 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; struct bpf_verifier_state *cur = env->cur_state; bool is_const = tnum_is_const(reg->var_off); - struct bpf_map *map = reg->map_ptr; u64 val = reg->var_off.value; + struct bpf_map *map = NULL; + struct btf *btf = NULL; + struct btf_record *rec; if (!is_const) { verbose(env, @@ -5582,38 +5670,78 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, regno); return -EINVAL; } - if (!map->btf) { - verbose(env, - "map '%s' has to have BTF in order to use bpf_spin_lock\n", - map->name); - return -EINVAL; + if (reg->type == PTR_TO_MAP_VALUE) { + map = reg->map_ptr; + if (!map->btf) { + verbose(env, + "map '%s' has to have BTF in order to use bpf_spin_lock\n", + map->name); + return -EINVAL; + } + } else { + btf = reg->btf; } - if (!btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - verbose(env, "map '%s' has no valid bpf_spin_lock\n", map->name); + + rec = reg_btf_record(reg); + if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) { + verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local", + map ? map->name : "kptr"); return -EINVAL; } - if (map->record->spin_lock_off != val + reg->off) { + if (rec->spin_lock_off != val + reg->off) { verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n", - val + reg->off, map->record->spin_lock_off); + val + reg->off, rec->spin_lock_off); return -EINVAL; } if (is_lock) { - if (cur->active_spin_lock) { + if (cur->active_lock.ptr) { verbose(env, "Locking two bpf_spin_locks are not allowed\n"); return -EINVAL; } - cur->active_spin_lock = reg->id; + if (map) + cur->active_lock.ptr = map; + else + cur->active_lock.ptr = btf; + cur->active_lock.id = reg->id; } else { - if (!cur->active_spin_lock) { + struct bpf_func_state *fstate = cur_func(env); + void *ptr; + int i; + + if (map) + ptr = map; + else + ptr = btf; + + if (!cur->active_lock.ptr) { verbose(env, "bpf_spin_unlock without taking a lock\n"); return -EINVAL; } - if (cur->active_spin_lock != reg->id) { + if (cur->active_lock.ptr != ptr || + cur->active_lock.id != reg->id) { verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } - cur->active_spin_lock = 0; + cur->active_lock.ptr = NULL; + cur->active_lock.id = 0; + + for (i = 0; i < fstate->acquired_refs; i++) { + int err; + + /* Complain on error because this reference state cannot + * be freed before this point, as bpf_spin_lock critical + * section does not allow functions that release the + * allocated object immediately. + */ + if (!fstate->refs[i].release_on_unlock) + continue; + err = release_reference(env, fstate->refs[i].id); + if (err) { + verbose(env, "failed to release release_on_unlock reference"); + return err; + } + } } return 0; } @@ -5772,6 +5900,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = { PTR_TO_TCP_SOCK, PTR_TO_XDP_SOCK, PTR_TO_BTF_ID, + PTR_TO_BTF_ID | PTR_TRUSTED, }, .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], }; @@ -5785,7 +5914,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, - PTR_TO_MEM | MEM_ALLOC, + PTR_TO_MEM | MEM_RINGBUF, PTR_TO_BUF, }, }; @@ -5800,14 +5929,31 @@ static const struct bpf_reg_types int_ptr_types = { }, }; +static const struct bpf_reg_types spin_lock_types = { + .types = { + PTR_TO_MAP_VALUE, + PTR_TO_BTF_ID | MEM_ALLOC, + } +}; + static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; +static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; -static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; -static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } }; +static const struct bpf_reg_types btf_ptr_types = { + .types = { + PTR_TO_BTF_ID, + PTR_TO_BTF_ID | PTR_TRUSTED, + PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED, + }, +}; +static const struct bpf_reg_types percpu_btf_ptr_types = { + .types = { + PTR_TO_BTF_ID | MEM_PERCPU, + PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, + } +}; static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -5836,7 +5982,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, + [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, @@ -5895,7 +6041,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, return -EACCES; found: - if (reg->type == PTR_TO_BTF_ID) { + if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) { /* For bpf_sk_release, it needs to match against first member * 'struct sock_common', hence make an exception for it. This * allows bpf_sk_release to work for multiple socket types. @@ -5931,6 +6077,11 @@ found: return -EACCES; } } + } else if (type_is_alloc(reg->type)) { + if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) { + verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); + return -EFAULT; + } } return 0; @@ -5957,20 +6108,24 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MAP_VALUE: case PTR_TO_MEM: case PTR_TO_MEM | MEM_RDONLY: - case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_MEM | MEM_RINGBUF: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ - if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM) + if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM) return 0; break; /* All the rest must be rejected, except PTR_TO_BTF_ID which allows * fixed offset. */ case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID | MEM_ALLOC: + case PTR_TO_BTF_ID | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: /* When referenced PTR_TO_BTF_ID is passed to release function, * it's fixed offset must be 0. In the other cases, fixed offset * can be non-zero. @@ -6046,7 +6201,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, goto skip_type_check; /* arg_btf_id and arg_size are in a union. */ - if (base_type(arg_type) == ARG_PTR_TO_BTF_ID) + if (base_type(arg_type) == ARG_PTR_TO_BTF_ID || + base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) arg_btf_id = fn->arg_btf_id[arg]; err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); @@ -6664,9 +6820,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) int i; for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { - if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) - return false; - + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID) + return !!fn->arg_btf_id[i]; + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK) + return fn->arg_btf_id[i] == BPF_PTR_POISON; if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && /* arg_btf_id and arg_size are in a union. */ (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || @@ -7413,6 +7570,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (!env->prog->aux->sleepable && fn->might_sleep) { + verbose(env, "helper call might sleep in a non-sleepable prog\n"); + return -EINVAL; + } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { @@ -7431,6 +7593,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } + if (env->cur_state->active_rcu_lock) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (env->prog->aux->sleepable && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { @@ -7634,7 +7807,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; break; - case RET_PTR_TO_ALLOC_MEM: + case RET_PTR_TO_MEM: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = meta.mem_size; @@ -7797,19 +7970,921 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } +struct bpf_kfunc_call_arg_meta { + /* In parameters */ + struct btf *btf; + u32 func_id; + u32 kfunc_flags; + const struct btf_type *func_proto; + const char *func_name; + /* Out parameters */ + u32 ref_obj_id; + u8 release_regno; + bool r0_rdonly; + u32 ret_btf_id; + u64 r0_size; + struct { + u64 value; + bool found; + } arg_constant; + struct { + struct btf *btf; + u32 btf_id; + } arg_obj_drop; + struct { + struct btf_field *field; + } arg_list_head; +}; + +static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ACQUIRE; +} + +static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_RET_NULL; +} + +static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_RELEASE; +} + +static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_TRUSTED_ARGS; +} + +static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_SLEEPABLE; +} + +static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_DESTRUCTIVE; +} + +static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg) +{ + return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET); +} + +static bool __kfunc_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix) +{ + int suffix_len = strlen(suffix), len; + const char *param_name; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len < suffix_len) + return false; + param_name += len - suffix_len; + return !strncmp(param_name, suffix, suffix_len); +} + +static bool is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + const struct btf_type *t; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + return __kfunc_param_match_suffix(btf, arg, "__sz"); +} + +static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__k"); +} + +static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__ign"); +} + +static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__alloc"); +} + +static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, + const struct btf_param *arg, + const char *name) +{ + int len, target_len = strlen(name); + const char *param_name; + + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len != target_len) + return false; + if (strcmp(param_name, name)) + return false; + + return true; +} + +enum { + KF_ARG_DYNPTR_ID, + KF_ARG_LIST_HEAD_ID, + KF_ARG_LIST_NODE_ID, +}; + +BTF_ID_LIST(kf_arg_btf_ids) +BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_list_head) +BTF_ID(struct, bpf_list_node) + +static bool __is_kfunc_ptr_arg_type(const struct btf *btf, + const struct btf_param *arg, int type) +{ + const struct btf_type *t; + u32 res_id; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t) + return false; + if (!btf_type_is_ptr(t)) + return false; + t = btf_type_skip_modifiers(btf, t->type, &res_id); + if (!t) + return false; + return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]); +} + +static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID); +} + +static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID); +} + +static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID); +} + +/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ +static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env, + const struct btf *btf, + const struct btf_type *t, int rec) +{ + const struct btf_type *member_type; + const struct btf_member *member; + u32 i; + + if (!btf_type_is_struct(t)) + return false; + + for_each_member(i, t, member) { + const struct btf_array *array; + + member_type = btf_type_skip_modifiers(btf, member->type, NULL); + if (btf_type_is_struct(member_type)) { + if (rec >= 3) { + verbose(env, "max struct nesting depth exceeded\n"); + return false; + } + if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1)) + return false; + continue; + } + if (btf_type_is_array(member_type)) { + array = btf_array(member_type); + if (!array->nelems) + return false; + member_type = btf_type_skip_modifiers(btf, array->type, NULL); + if (!btf_type_is_scalar(member_type)) + return false; + continue; + } + if (!btf_type_is_scalar(member_type)) + return false; + } + return true; +} + + +static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { +#ifdef CONFIG_NET + [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], + [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], + [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], +#endif +}; + +enum kfunc_ptr_arg_type { + KF_ARG_PTR_TO_CTX, + KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ + KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ + KF_ARG_PTR_TO_DYNPTR, + KF_ARG_PTR_TO_LIST_HEAD, + KF_ARG_PTR_TO_LIST_NODE, + KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ + KF_ARG_PTR_TO_MEM, + KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ +}; + +enum special_kfunc_type { + KF_bpf_obj_new_impl, + KF_bpf_obj_drop_impl, + KF_bpf_list_push_front, + KF_bpf_list_push_back, + KF_bpf_list_pop_front, + KF_bpf_list_pop_back, + KF_bpf_cast_to_kern_ctx, + KF_bpf_rdonly_cast, + KF_bpf_rcu_read_lock, + KF_bpf_rcu_read_unlock, +}; + +BTF_SET_START(special_kfunc_set) +BTF_ID(func, bpf_obj_new_impl) +BTF_ID(func, bpf_obj_drop_impl) +BTF_ID(func, bpf_list_push_front) +BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_list_pop_front) +BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx) +BTF_ID(func, bpf_rdonly_cast) +BTF_SET_END(special_kfunc_set) + +BTF_ID_LIST(special_kfunc_list) +BTF_ID(func, bpf_obj_new_impl) +BTF_ID(func, bpf_obj_drop_impl) +BTF_ID(func, bpf_list_push_front) +BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_list_pop_front) +BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx) +BTF_ID(func, bpf_rdonly_cast) +BTF_ID(func, bpf_rcu_read_lock) +BTF_ID(func, bpf_rcu_read_unlock) + +static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock]; +} + +static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; +} + +static enum kfunc_ptr_arg_type +get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, + const struct btf_type *t, const struct btf_type *ref_t, + const char *ref_tname, const struct btf_param *args, + int argno, int nargs) +{ + u32 regno = argno + 1; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + bool arg_mem_size = false; + + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) + return KF_ARG_PTR_TO_CTX; + + /* In this function, we verify the kfunc's BTF as per the argument type, + * leaving the rest of the verification with respect to the register + * type to our caller. When a set of conditions hold in the BTF type of + * arguments, we resolve it to a known kfunc_ptr_arg_type. + */ + if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) + return KF_ARG_PTR_TO_CTX; + + if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_ALLOC_BTF_ID; + + if (is_kfunc_arg_kptr_get(meta, argno)) { + if (!btf_type_is_ptr(ref_t)) { + verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n"); + return -EINVAL; + } + ref_t = btf_type_by_id(meta->btf, ref_t->type); + ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off); + if (!btf_type_is_struct(ref_t)) { + verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n", + meta->func_name, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return KF_ARG_PTR_TO_KPTR; + } + + if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_DYNPTR; + + if (is_kfunc_arg_list_head(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_LIST_HEAD; + + if (is_kfunc_arg_list_node(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_LIST_NODE; + + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { + if (!btf_type_is_struct(ref_t)) { + verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return KF_ARG_PTR_TO_BTF_ID; + } + + if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])) + arg_mem_size = true; + + /* This is the catch all argument type of register types supported by + * check_helper_mem_access. However, we only allow when argument type is + * pointer to scalar, or struct composed (recursively) of scalars. When + * arg_mem_size is true, the pointer can be void *. + */ + if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) && + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { + verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", + argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); + return -EINVAL; + } + return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM; +} + +static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const struct btf_type *ref_t, + const char *ref_tname, u32 ref_id, + struct bpf_kfunc_call_arg_meta *meta, + int argno) +{ + const struct btf_type *reg_ref_t; + bool strict_type_match = false; + const struct btf *reg_btf; + const char *reg_ref_tname; + u32 reg_ref_id; + + if (base_type(reg->type) == PTR_TO_BTF_ID) { + reg_btf = reg->btf; + reg_ref_id = reg->btf_id; + } else { + reg_btf = btf_vmlinux; + reg_ref_id = *reg2btf_ids[base_type(reg->type)]; + } + + if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id)) + strict_type_match = true; + + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); + reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); + if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, + btf_type_str(reg_ref_t), reg_ref_tname); + return -EINVAL; + } + return 0; +} + +static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const struct btf_type *ref_t, + const char *ref_tname, + struct bpf_kfunc_call_arg_meta *meta, + int argno) +{ + struct btf_field *kptr_field; + + /* check_func_arg_reg_off allows var_off for + * PTR_TO_MAP_VALUE, but we need fixed offset to find + * off_desc. + */ + if (!tnum_is_const(reg->var_off)) { + verbose(env, "arg#0 must have constant offset\n"); + return -EINVAL; + } + + kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR); + if (!kptr_field || kptr_field->type != BPF_KPTR_REF) { + verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n", + reg->off + reg->var_off.value); + return -EINVAL; + } + + if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf, + kptr_field->kptr.btf_id, true)) { + verbose(env, "kernel function %s args#%d expected pointer to %s %s\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return 0; +} + +static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id) +{ + struct bpf_func_state *state = cur_func(env); + struct bpf_reg_state *reg; + int i; + + /* bpf_spin_lock only allows calling list_push and list_pop, no BPF + * subprogs, no global functions. This means that the references would + * not be released inside the critical section but they may be added to + * the reference state, and the acquired_refs are never copied out for a + * different frame as BPF to BPF calls don't work in bpf_spin_lock + * critical sections. + */ + if (!ref_obj_id) { + verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n"); + return -EFAULT; + } + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].id == ref_obj_id) { + if (state->refs[i].release_on_unlock) { + verbose(env, "verifier internal error: expected false release_on_unlock"); + return -EFAULT; + } + state->refs[i].release_on_unlock = true; + /* Now mark everyone sharing same ref_obj_id as untrusted */ + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) + reg->type |= PTR_UNTRUSTED; + })); + return 0; + } + } + verbose(env, "verifier internal error: ref state missing for ref_obj_id\n"); + return -EFAULT; +} + +/* Implementation details: + * + * Each register points to some region of memory, which we define as an + * allocation. Each allocation may embed a bpf_spin_lock which protects any + * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same + * allocation. The lock and the data it protects are colocated in the same + * memory region. + * + * Hence, everytime a register holds a pointer value pointing to such + * allocation, the verifier preserves a unique reg->id for it. + * + * The verifier remembers the lock 'ptr' and the lock 'id' whenever + * bpf_spin_lock is called. + * + * To enable this, lock state in the verifier captures two values: + * active_lock.ptr = Register's type specific pointer + * active_lock.id = A unique ID for each register pointer value + * + * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two + * supported register types. + * + * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of + * allocated objects is the reg->btf pointer. + * + * The active_lock.id is non-unique for maps supporting direct_value_addr, as we + * can establish the provenance of the map value statically for each distinct + * lookup into such maps. They always contain a single map value hence unique + * IDs for each pseudo load pessimizes the algorithm and rejects valid programs. + * + * So, in case of global variables, they use array maps with max_entries = 1, + * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point + * into the same map value as max_entries is 1, as described above). + * + * In case of inner map lookups, the inner map pointer has same map_ptr as the + * outer map pointer (in verifier context), but each lookup into an inner map + * assigns a fresh reg->id to the lookup, so while lookups into distinct inner + * maps from the same outer map share the same map_ptr as active_lock.ptr, they + * will get different reg->id assigned to each lookup, hence different + * active_lock.id. + * + * In case of allocated objects, active_lock.ptr is the reg->btf, and the + * reg->id is a unique ID preserved after the NULL pointer check on the pointer + * returned from bpf_obj_new. Each allocation receives a new reg->id. + */ +static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + void *ptr; + u32 id; + + switch ((int)reg->type) { + case PTR_TO_MAP_VALUE: + ptr = reg->map_ptr; + break; + case PTR_TO_BTF_ID | MEM_ALLOC: + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: + ptr = reg->btf; + break; + default: + verbose(env, "verifier internal error: unknown reg type for lock check\n"); + return -EFAULT; + } + id = reg->id; + + if (!env->cur_state->active_lock.ptr) + return -EINVAL; + if (env->cur_state->active_lock.ptr != ptr || + env->cur_state->active_lock.id != id) { + verbose(env, "held lock and object are not in the same allocation\n"); + return -EINVAL; + } + return 0; +} + +static bool is_bpf_list_api_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_list_push_front] || + btf_id == special_kfunc_list[KF_bpf_list_push_back] || + btf_id == special_kfunc_list[KF_bpf_list_pop_front] || + btf_id == special_kfunc_list[KF_bpf_list_pop_back]; +} + +static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct btf_field *field; + struct btf_record *rec; + u32 list_head_off; + + if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) { + verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n"); + return -EFAULT; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n", + regno); + return -EINVAL; + } + + rec = reg_btf_record(reg); + list_head_off = reg->off + reg->var_off.value; + field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD); + if (!field) { + verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off); + return -EINVAL; + } + + /* All functions require bpf_list_head to be protected using a bpf_spin_lock */ + if (check_reg_allocation_locked(env, reg)) { + verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n", + rec->spin_lock_off); + return -EINVAL; + } + + if (meta->arg_list_head.field) { + verbose(env, "verifier internal error: repeating bpf_list_head arg\n"); + return -EFAULT; + } + meta->arg_list_head.field = field; + return 0; +} + +static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + const struct btf_type *et, *t; + struct btf_field *field; + struct btf_record *rec; + u32 list_node_off; + + if (meta->btf != btf_vmlinux || + (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] && + meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) { + verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n"); + return -EFAULT; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n", + regno); + return -EINVAL; + } + + rec = reg_btf_record(reg); + list_node_off = reg->off + reg->var_off.value; + field = btf_record_find(rec, list_node_off, BPF_LIST_NODE); + if (!field || field->offset != list_node_off) { + verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off); + return -EINVAL; + } + + field = meta->arg_list_head.field; + + et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id); + t = btf_type_by_id(reg->btf, reg->btf_id); + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf, + field->list_head.value_btf_id, true)) { + verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d " + "in struct %s, but arg is at offset=%d in struct %s\n", + field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off), + list_node_off, btf_name_by_offset(reg->btf, t->name_off)); + return -EINVAL; + } + + if (list_node_off != field->list_head.node_offset) { + verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n", + list_node_off, field->list_head.node_offset, + btf_name_by_offset(field->list_head.btf, et->name_off)); + return -EINVAL; + } + /* Set arg#1 for expiration after unlock */ + return ref_set_release_on_unlock(env, reg->ref_obj_id); +} + +static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta) +{ + const char *func_name = meta->func_name, *ref_tname; + const struct btf *btf = meta->btf; + const struct btf_param *args; + u32 i, nargs; + int ret; + + args = (const struct btf_param *)(meta->func_proto + 1); + nargs = btf_type_vlen(meta->func_proto); + if (nargs > MAX_BPF_FUNC_REG_ARGS) { + verbose(env, "Function %s has %d > %d args\n", func_name, nargs, + MAX_BPF_FUNC_REG_ARGS); + return -EINVAL; + } + + /* Check that BTF function arguments match actual types that the + * verifier sees. + */ + for (i = 0; i < nargs; i++) { + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1]; + const struct btf_type *t, *ref_t, *resolve_ret; + enum bpf_arg_type arg_type = ARG_DONTCARE; + u32 regno = i + 1, ref_id, type_size; + bool is_ret_buf_sz = false; + int kf_arg_type; + + t = btf_type_skip_modifiers(btf, args[i].type, NULL); + + if (is_kfunc_arg_ignore(btf, &args[i])) + continue; + + if (btf_type_is_scalar(t)) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "R%d is not a scalar\n", regno); + return -EINVAL; + } + + if (is_kfunc_arg_constant(meta->btf, &args[i])) { + if (meta->arg_constant.found) { + verbose(env, "verifier internal error: only one constant argument permitted\n"); + return -EFAULT; + } + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d must be a known constant\n", regno); + return -EINVAL; + } + ret = mark_chain_precision(env, regno); + if (ret < 0) + return ret; + meta->arg_constant.found = true; + meta->arg_constant.value = reg->var_off.value; + } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) { + meta->r0_rdonly = true; + is_ret_buf_sz = true; + } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) { + is_ret_buf_sz = true; + } + + if (is_ret_buf_sz) { + if (meta->r0_size) { + verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); + return -EINVAL; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a const\n", regno); + return -EINVAL; + } + + meta->r0_size = reg->var_off.value; + ret = mark_chain_precision(env, regno); + if (ret) + return ret; + } + continue; + } + + if (!btf_type_is_ptr(t)) { + verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t)); + return -EINVAL; + } + + if (reg->ref_obj_id) { + if (is_kfunc_release(meta) && meta->ref_obj_id) { + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, + meta->ref_obj_id); + return -EFAULT; + } + meta->ref_obj_id = reg->ref_obj_id; + if (is_kfunc_release(meta)) + meta->release_regno = regno; + } + + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + + kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs); + if (kf_arg_type < 0) + return kf_arg_type; + + switch (kf_arg_type) { + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + case KF_ARG_PTR_TO_BTF_ID: + if (!is_kfunc_trusted_args(meta)) + break; + + if (!is_trusted_reg(reg)) { + verbose(env, "R%d must be referenced or trusted\n", regno); + return -EINVAL; + } + fallthrough; + case KF_ARG_PTR_TO_CTX: + /* Trusted arguments have the same offset checks as release arguments */ + arg_type |= OBJ_RELEASE; + break; + case KF_ARG_PTR_TO_KPTR: + case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_LIST_HEAD: + case KF_ARG_PTR_TO_LIST_NODE: + case KF_ARG_PTR_TO_MEM: + case KF_ARG_PTR_TO_MEM_SIZE: + /* Trusted by default */ + break; + default: + WARN_ON_ONCE(1); + return -EFAULT; + } + + if (is_kfunc_release(meta) && reg->ref_obj_id) + arg_type |= OBJ_RELEASE; + ret = check_func_arg_reg_off(env, reg, regno, arg_type); + if (ret < 0) + return ret; + + switch (kf_arg_type) { + case KF_ARG_PTR_TO_CTX: + if (reg->type != PTR_TO_CTX) { + verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); + return -EINVAL; + } + + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog)); + if (ret < 0) + return -EINVAL; + meta->ret_btf_id = ret; + } + break; + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to allocated object\n", i); + return -EINVAL; + } + if (!reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + if (meta->btf == btf_vmlinux && + meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + meta->arg_obj_drop.btf = reg->btf; + meta->arg_obj_drop.btf_id = reg->btf_id; + } + break; + case KF_ARG_PTR_TO_KPTR: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#0 expected pointer to map value\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_DYNPTR: + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d expected pointer to stack\n", i); + return -EINVAL; + } + + if (!is_dynptr_reg_valid_init(env, reg)) { + verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n", + i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + + if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) { + verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n", + i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + break; + case KF_ARG_PTR_TO_LIST_HEAD: + if (reg->type != PTR_TO_MAP_VALUE && + reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); + return -EINVAL; + } + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_LIST_NODE: + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to allocated object\n", i); + return -EINVAL; + } + if (!reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_BTF_ID: + /* Only base_type is checked, further checks are done here */ + if ((base_type(reg->type) != PTR_TO_BTF_ID || + bpf_type_has_unsafe_modifiers(reg->type)) && + !reg2btf_ids[base_type(reg->type)]) { + verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type)); + verbose(env, "expected %s or socket\n", + reg_type_str(env, base_type(reg->type) | + (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_MEM: + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); + if (IS_ERR(resolve_ret)) { + verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", + i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret)); + return -EINVAL; + } + ret = check_mem_reg(env, reg, regno, type_size); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_MEM_SIZE: + ret = check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1); + if (ret < 0) { + verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); + return ret; + } + /* Skip next '__sz' argument */ + i++; + break; + } + } + + if (is_kfunc_release(meta) && !meta->release_regno) { + verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", + func_name); + return -EINVAL; + } + + return 0; +} + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); - struct bpf_kfunc_arg_meta meta = { 0 }; const char *func_name, *ptr_type_name; + bool sleepable, rcu_lock, rcu_unlock; + struct bpf_kfunc_call_arg_meta meta; u32 i, nargs, func_id, ptr_type_id; int err, insn_idx = *insn_idx_p; const struct btf_param *args; + const struct btf_type *ret_t; struct btf *desc_btf; u32 *kfunc_flags; - bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) @@ -7830,24 +8905,68 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name); return -EACCES; } - if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) { - verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n"); + + /* Prepare kfunc call metadata */ + memset(&meta, 0, sizeof(meta)); + meta.btf = desc_btf; + meta.func_id = func_id; + meta.kfunc_flags = *kfunc_flags; + meta.func_proto = func_proto; + meta.func_name = func_name; + + if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { + verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); + return -EACCES; + } + + sleepable = is_kfunc_sleepable(&meta); + if (sleepable && !env->prog->aux->sleepable) { + verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); + return -EACCES; + } + + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); + rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); + if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) { + verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name); return -EACCES; } - acq = *kfunc_flags & KF_ACQUIRE; + if (env->cur_state->active_rcu_lock) { + struct bpf_func_state *state; + struct bpf_reg_state *reg; - meta.flags = *kfunc_flags; + if (rcu_lock) { + verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); + return -EINVAL; + } else if (rcu_unlock) { + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->type & MEM_RCU) { + reg->type &= ~(MEM_RCU | PTR_TRUSTED); + reg->type |= PTR_UNTRUSTED; + } + })); + env->cur_state->active_rcu_lock = false; + } else if (sleepable) { + verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name); + return -EACCES; + } + } else if (rcu_lock) { + env->cur_state->active_rcu_lock = true; + } else if (rcu_unlock) { + verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); + return -EINVAL; + } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta); + err = check_kfunc_args(env, &meta); if (err < 0) return err; /* In case of release function, we get register number of refcounted - * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ - if (err) { - err = release_reference(env, regs[err].ref_obj_id); + if (meta.release_regno) { + err = release_reference(env, regs[meta.release_regno].ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", func_name, func_id); @@ -7861,18 +8980,92 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); - if (acq && !btf_type_is_struct_ptr(desc_btf, t)) { - verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); - return -EINVAL; + if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { + /* Only exception is bpf_obj_new_impl */ + if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } } if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(desc_btf, t->type, - &ptr_type_id); - if (!btf_type_is_struct(ptr_type)) { + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); + + if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { + if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + struct btf *ret_btf; + u32 ret_btf_id; + + if (unlikely(!bpf_global_ma_set)) + return -ENOMEM; + + if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { + verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); + return -EINVAL; + } + + ret_btf = env->prog->aux->btf; + ret_btf_id = meta.arg_constant.value; + + /* This may be NULL due to user not supplying a BTF */ + if (!ret_btf) { + verbose(env, "bpf_obj_new requires prog BTF\n"); + return -EINVAL; + } + + ret_t = btf_type_by_id(ret_btf, ret_btf_id); + if (!ret_t || !__btf_type_is_struct(ret_t)) { + verbose(env, "bpf_obj_new type ID argument must be of a struct\n"); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = ret_btf; + regs[BPF_REG_0].btf_id = ret_btf_id; + + env->insn_aux_data[insn_idx].obj_new_size = ret_t->size; + env->insn_aux_data[insn_idx].kptr_struct_meta = + btf_find_struct_meta(ret_btf, ret_btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + env->insn_aux_data[insn_idx].kptr_struct_meta = + btf_find_struct_meta(meta.arg_obj_drop.btf, + meta.arg_obj_drop.btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || + meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { + struct btf_field *field = meta.arg_list_head.field; + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = field->list_head.btf; + regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; + regs[BPF_REG_0].off = field->list_head.node_offset; + } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value); + if (!ret_t || !btf_type_is_struct(ret_t)) { + verbose(env, + "kfunc bpf_rdonly_cast type ID argument must be of a struct\n"); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta.arg_constant.value; + } else { + verbose(env, "kernel function %s unhandled dynamic return type\n", + meta.func_name); + return -EFAULT; + } + } else if (!__btf_type_is_struct(ptr_type)) { if (!meta.r0_size) { ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); @@ -7900,20 +9093,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; } - if (*kfunc_flags & KF_RET_NULL) { + + if (is_kfunc_ret_null(&meta)) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ regs[BPF_REG_0].id = ++env->id_gen; } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); - if (acq) { + if (is_kfunc_acquire(&meta)) { int id = acquire_reference_state(env, insn_idx); if (id < 0) return id; - regs[BPF_REG_0].id = id; + if (is_kfunc_ret_null(&meta)) + regs[BPF_REG_0].id = id; regs[BPF_REG_0].ref_obj_id = id; } + if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) + regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -10086,16 +11283,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, { if (type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { - if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || - !tnum_equals_const(reg->var_off, 0) || - reg->off)) { - /* Old offset (both fixed and variable parts) should - * have been known-zero, because we don't allow pointer - * arithmetic on pointers that might be NULL. If we - * see this happening, don't convert the register. - */ + /* Old offset (both fixed and variable parts) should have been + * known-zero, because we don't allow pointer arithmetic on + * pointers that might be NULL. If we see this happening, don't + * convert the register. + * + * But in some cases, some helpers that return local kptrs + * advance offset for the returned pointer. In those cases, it + * is fine to expect to see reg->off. + */ + if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0))) + return; + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off)) return; - } if (is_null) { reg->type = SCALAR_VALUE; /* We don't need id and ref_obj_id from this point @@ -10269,6 +11469,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_verifier_state *other_branch; struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; + struct bpf_reg_state *eq_branch_regs; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -10378,8 +11579,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, /* detect if we are comparing against a constant value so we can adjust * our min/max values for our dst register. * this is only legit if both are scalars (or pointers to the same - * object, I suppose, but we don't support that right now), because - * otherwise the different base pointers mean the offsets aren't + * object, I suppose, see the PTR_MAYBE_NULL related if block below), + * because otherwise the different base pointers mean the offsets aren't * comparable. */ if (BPF_SRC(insn->code) == BPF_X) { @@ -10428,6 +11629,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); } + /* if one pointer register is compared to another pointer + * register check if PTR_MAYBE_NULL could be lifted. + * E.g. register A - maybe null + * register B - not null + * for JNE A, B, ... - A is not null in the false branch; + * for JEQ A, B, ... - A is not null in the true branch. + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && + __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && + type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { + eq_branch_regs = NULL; + switch (opcode) { + case BPF_JEQ: + eq_branch_regs = other_branch_regs; + break; + case BPF_JNE: + eq_branch_regs = regs; + break; + default: + /* do nothing */ + break; + } + if (eq_branch_regs) { + if (type_may_be_null(src_reg->type)) + mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]); + else + mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]); + } + } + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). * NOTE: these optimizations below are related with pointer comparison * which will never be JMP32. @@ -10534,8 +11765,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; - if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) - dst_reg->id = ++env->id_gen; + WARN_ON_ONCE(map->max_entries != 1); + /* We want reg->id to be same (0) as map_value is not distinct */ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || insn->src_reg == BPF_PSEUDO_MAP_IDX) { dst_reg->type = CONST_PTR_TO_MAP; @@ -10613,11 +11844,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - if (env->cur_state->active_spin_lock) { + if (env->cur_state->active_lock.ptr) { verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); return -EINVAL; } + if (env->cur_state->active_rcu_lock) { + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n"); + return -EINVAL; + } + if (regs[ctx_reg].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -11879,7 +13115,11 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; - if (old->active_spin_lock != cur->active_spin_lock) + if (old->active_lock.ptr != cur->active_lock.ptr || + old->active_lock.id != cur->active_lock.id) + return false; + + if (old->active_rcu_lock != cur->active_rcu_lock) return false; /* for states to be equal callsites have to be the same @@ -12524,11 +13764,14 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_spin_lock && - (insn->src_reg == BPF_PSEUDO_CALL || - insn->imm != BPF_FUNC_spin_unlock)) { - verbose(env, "function calls are not allowed while holding a lock\n"); - return -EINVAL; + if (env->cur_state->active_lock.ptr) { + if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || + (insn->src_reg == BPF_PSEUDO_CALL) || + (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) { + verbose(env, "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); @@ -12561,11 +13804,16 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_spin_lock) { + if (env->cur_state->active_lock.ptr) { verbose(env, "bpf_spin_unlock is missing\n"); return -EINVAL; } + if (env->cur_state->active_rcu_lock) { + verbose(env, "bpf_rcu_read_unlock is missing\n"); + return -EINVAL; + } + /* We must do check_reference_leak here before * prepare_func_exit to handle the case when * state->curframe > 0, it may be a callback @@ -12818,6 +14066,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(prog); + if (btf_record_has_field(map->record, BPF_LIST_HEAD)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_list_head yet\n"); + return -EINVAL; + } + } + if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) { if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); @@ -13654,6 +14909,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) break; case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_UNTRUSTED: + /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike + * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot + * be said once it is marked PTR_UNTRUSTED, hence we must handle + * any faults for loads into such types. BPF_WRITE is disallowed + * for this case. + */ + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: if (type == BPF_READ) { insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); @@ -14019,8 +15281,8 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } -static int fixup_kfunc_call(struct bpf_verifier_env *env, - struct bpf_insn *insn) +static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; @@ -14039,8 +15301,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, return -EFAULT; } + *cnt = 0; insn->imm = desc->imm; - + if (insn->off) + return 0; + if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; + u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size; + + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size); + insn_buf[1] = addr[0]; + insn_buf[2] = addr[1]; + insn_buf[3] = *insn; + *cnt = 4; + } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; + + insn_buf[0] = addr[0]; + insn_buf[1] = addr[1]; + insn_buf[2] = *insn; + *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); + *cnt = 1; + } return 0; } @@ -14182,9 +15469,19 @@ static int do_misc_fixups(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) continue; if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { - ret = fixup_kfunc_call(env, insn); + ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); if (ret) return ret; + if (cnt == 0) + continue; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; } @@ -14302,14 +15599,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_call_imm; } - if (insn->imm == BPF_FUNC_task_storage_get || - insn->imm == BPF_FUNC_sk_storage_get || - insn->imm == BPF_FUNC_inode_storage_get || - insn->imm == BPF_FUNC_cgrp_storage_get) { - if (env->prog->aux->sleepable) - insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); - else + if (is_storage_get_function(insn->imm)) { + if (!env->prog->aux->sleepable || + env->insn_aux_data[i + delta].storage_get_func_atomic) insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); + else + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); insn_buf[1] = *insn; cnt = 2; @@ -14379,7 +15674,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, - (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, (int (*)(struct bpf_map *map, bpf_callback_t callback_fn, @@ -15388,6 +16683,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); + env->rcu_tag_supported = btf_vmlinux && + btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0; if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; |