diff options
author | Alexei Starovoitov <ast@kernel.org> | 2022-12-04 12:52:40 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2022-12-04 12:52:40 -0800 |
commit | 1910676cc1ec29fad850448ead0fffdb93fb74b5 (patch) | |
tree | ce785f382237fb42178ddb609ae78bfb334357bf | |
parent | 706819495921ddad6b3780140b9d9e9293b6dedc (diff) | |
parent | f53625649888c6ec9eeca151f802e905482c6698 (diff) |
Merge branch 'bpf: Handle MEM_RCU type properly'
Yonghong Song says:
====================
Patch set [1] added rcu support for bpf programs. In [1], a rcu
pointer is considered to be trusted and not null. This is actually
not true in some cases. The rcu pointer could be null, and for non-null
rcu pointer, it may have reference count of 0. This small patch set
fixed this problem. Patch 1 is the kernel fix. Patch 2 adjusted
selftests properly. Patch 3 added documentation for newly-introduced
KF_RCU flag.
[1] https://lore.kernel.org/all/20221124053201.2372298-1-yhs@fb.com/
Changelogs:
v1 -> v2:
- rcu ptr could be NULL.
- non_null_rcu_ptr->rcu_field can be marked as MEM_RCU as well.
- Adjust the code to avoid existing error message change.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | Documentation/bpf/kfuncs.rst | 9 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 2 | ||||
-rw-r--r-- | include/linux/btf.h | 1 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 14 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 45 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/rcu_read_lock.c | 55 |
6 files changed, 102 insertions, 24 deletions
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 90774479ab7a..b027fe16ee66 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -191,6 +191,15 @@ rebooting or panicking. Due to this additional restrictions apply to these calls. At the moment they only require CAP_SYS_BOOT capability, but more can be added later. +2.4.8 KF_RCU flag +----------------- + +The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument. +When used together with KF_ACQUIRE, it indicates the kfunc should have a +single argument which must be a trusted argument or a MEM_RCU pointer. +The argument may have reference count of 0 and the kfunc must take this +into consideration. + 2.5 Registering the kfuncs -------------------------- diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b5090e89cb3f..c07b351a5bc7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -682,7 +682,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) } } -#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED) +#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) static inline bool bpf_type_has_unsafe_modifiers(u32 type) { diff --git a/include/linux/btf.h b/include/linux/btf.h index 9ed00077db6e..cbd6e4096f8c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -70,6 +70,7 @@ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ +#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ /* * Return the name of the passed struct, if exists, or halt the build if for diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a5a511430f2a..cca642358e80 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1838,6 +1838,19 @@ struct task_struct *bpf_task_acquire(struct task_struct *p) } /** + * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task + * acquired by this kfunc which is not stored in a map as a kptr, must be + * released by calling bpf_task_release(). + * @p: The task on which a reference is being acquired. + */ +struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) +{ + if (!refcount_inc_not_zero(&p->rcu_users)) + return NULL; + return p; +} + +/** * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task * kptr acquired by this kfunc which is not subsequently stored in a map, must * be released by calling bpf_task_release(). @@ -2013,6 +2026,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) #ifdef CONFIG_CGROUPS diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b0db9c10567b..66b82f52b5bc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4275,7 +4275,7 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg) return true; /* If a register is not referenced, it is trusted if it has the - * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the + * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the * other type modifiers may be safe, but we elect to take an opt-in * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are * not. @@ -4287,6 +4287,11 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg) !bpf_type_has_unsafe_modifiers(reg->type); } +static bool is_rcu_reg(const struct bpf_reg_state *reg) +{ + return reg->type & MEM_RCU; +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -4785,14 +4790,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (flag & MEM_RCU) { /* Mark value register as MEM_RCU only if it is protected by - * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU + * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU * itself can already indicate trustedness inside the rcu - * read lock region. Also mark it as PTR_TRUSTED. + * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since + * it could be null in some cases. */ - if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg)) + if (!env->cur_state->active_rcu_lock || + !(is_trusted_reg(reg) || is_rcu_reg(reg))) flag &= ~MEM_RCU; else - flag |= PTR_TRUSTED; + flag |= PTR_MAYBE_NULL; } else if (reg->type & MEM_RCU) { /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. @@ -5957,7 +5964,7 @@ static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID, PTR_TO_BTF_ID | PTR_TRUSTED, - PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED, + PTR_TO_BTF_ID | MEM_RCU, }, }; static const struct bpf_reg_types percpu_btf_ptr_types = { @@ -6136,7 +6143,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | MEM_ALLOC: case PTR_TO_BTF_ID | PTR_TRUSTED: - case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_RCU: case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: /* When referenced PTR_TO_BTF_ID is passed to release function, * it's fixed offset must be 0. In the other cases, fixed offset @@ -8038,6 +8045,11 @@ static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_DESTRUCTIVE; } +static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_RCU; +} + static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg) { return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET); @@ -8722,13 +8734,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ switch (kf_arg_type) { case KF_ARG_PTR_TO_ALLOC_BTF_ID: case KF_ARG_PTR_TO_BTF_ID: - if (!is_kfunc_trusted_args(meta)) + if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta)) break; if (!is_trusted_reg(reg)) { - verbose(env, "R%d must be referenced or trusted\n", regno); - return -EINVAL; + if (!is_kfunc_rcu(meta)) { + verbose(env, "R%d must be referenced or trusted\n", regno); + return -EINVAL; + } + if (!is_rcu_reg(reg)) { + verbose(env, "R%d must be a rcu pointer\n", regno); + return -EINVAL; + } } + fallthrough; case KF_ARG_PTR_TO_CTX: /* Trusted arguments have the same offset checks as release arguments */ @@ -8839,7 +8858,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_BTF_ID: /* Only base_type is checked, further checks are done here */ if ((base_type(reg->type) != PTR_TO_BTF_ID || - bpf_type_has_unsafe_modifiers(reg->type)) && + (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) && !reg2btf_ids[base_type(reg->type)]) { verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type)); verbose(env, "expected %s or socket\n", @@ -8954,7 +8973,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } else if (rcu_unlock) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg->type & MEM_RCU) { - reg->type &= ~(MEM_RCU | PTR_TRUSTED); + reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); reg->type |= PTR_UNTRUSTED; } })); @@ -11294,7 +11313,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, bool is_null) { if (type_may_be_null(reg->type) && reg->id == id && - !WARN_ON_ONCE(!reg->id)) { + (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) { /* Old offset (both fixed and variable parts) should have been * known-zero, because we don't allow pointer arithmetic on * pointers that might be NULL. If we see this happening, don't diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 94a970076b98..cf06a34fcb02 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -23,13 +23,14 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; -struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym; void bpf_task_release(struct task_struct *p) __ksym; SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int get_cgroup_id(void *ctx) { struct task_struct *task; + struct css_set *cgroups; task = bpf_get_current_task_btf(); if (task->pid != target_pid) @@ -37,7 +38,11 @@ int get_cgroup_id(void *ctx) /* simulate bpf_get_current_cgroup_id() helper */ bpf_rcu_read_lock(); - cgroup_id = task->cgroups->dfl_cgrp->kn->id; + cgroups = task->cgroups; + if (!cgroups) + goto unlock; + cgroup_id = cgroups->dfl_cgrp->kn->id; +unlock: bpf_rcu_read_unlock(); return 0; } @@ -56,6 +61,8 @@ int task_succ(void *ctx) bpf_rcu_read_lock(); /* region including helper using rcu ptr real_parent */ real_parent = task->real_parent; + if (!real_parent) + goto out; ptr = bpf_task_storage_get(&map_a, real_parent, &init_val, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) @@ -92,7 +99,10 @@ int two_regions(void *ctx) bpf_rcu_read_unlock(); bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: bpf_rcu_read_unlock(); return 0; } @@ -105,7 +115,10 @@ int non_sleepable_1(void *ctx) task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: bpf_rcu_read_unlock(); return 0; } @@ -121,7 +134,10 @@ int non_sleepable_2(void *ctx) bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: bpf_rcu_read_unlock(); return 0; } @@ -129,16 +145,28 @@ int non_sleepable_2(void *ctx) SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") int task_acquire(void *ctx) { - struct task_struct *task, *real_parent; + struct task_struct *task, *real_parent, *gparent; task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; + + /* rcu_ptr->rcu_field */ + gparent = real_parent->real_parent; + if (!gparent) + goto out; + /* acquire a reference which can be used outside rcu read lock region */ - real_parent = bpf_task_acquire(real_parent); + gparent = bpf_task_acquire_not_zero(gparent); + if (!gparent) + goto out; + + (void)bpf_task_storage_get(&map_a, gparent, 0, 0); + bpf_task_release(gparent); +out: bpf_rcu_read_unlock(); - (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); - bpf_task_release(real_parent); return 0; } @@ -181,9 +209,12 @@ int non_sleepable_rcu_mismatch(void *ctx) /* non-sleepable: missing bpf_rcu_read_unlock() in one path */ bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); if (real_parent) bpf_rcu_read_unlock(); +out: return 0; } @@ -199,16 +230,17 @@ int inproper_sleepable_helper(void *ctx) /* sleepable helper in rcu read lock region */ bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; regs = (struct pt_regs *)bpf_task_pt_regs(real_parent); - if (!regs) { - bpf_rcu_read_unlock(); - return 0; - } + if (!regs) + goto out; ptr = (void *)PT_REGS_IP(regs); (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0); user_data = value; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: bpf_rcu_read_unlock(); return 0; } @@ -239,7 +271,10 @@ int nested_rcu_region(void *ctx) bpf_rcu_read_lock(); bpf_rcu_read_lock(); real_parent = task->real_parent; + if (!real_parent) + goto out; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: bpf_rcu_read_unlock(); bpf_rcu_read_unlock(); return 0; |