diff options
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
-rw-r--r-- | tools/lib/bpf/libbpf.c | 1273 |
1 files changed, 1102 insertions, 171 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 809fe209cdcc..e89cc9c885b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -302,7 +302,7 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; - bool load; + bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -357,6 +357,7 @@ enum libbpf_map_type { }; struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section @@ -386,7 +387,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; __u64 map_extra; }; @@ -483,6 +484,8 @@ struct elf_state { int st_ops_shndx; }; +struct usdt_manager; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -545,6 +548,8 @@ struct bpf_object { size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + char path[]; }; @@ -668,7 +673,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } prog->instances.fds = NULL; prog->instances.nr = -1; @@ -1218,10 +1234,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; @@ -1397,8 +1411,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); @@ -1417,36 +1434,21 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + struct bpf_map *map; + int err; - obj->maps_cap = new_cap; - obj->maps = new_maps; + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + map->fd = -1; + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } static size_t bpf_map_mmap_sz(const struct bpf_map *map) @@ -2749,6 +2751,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2762,6 +2767,43 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2920,7 +2962,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -4268,6 +4310,20 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map->obj->loaded) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -4587,7 +4643,7 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } @@ -4678,6 +4734,18 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4740,6 +4808,9 @@ static struct kern_feature_desc { [FEAT_MEMCG_ACCOUNT] = { "memcg-based memory accounting", probe_memcg_account, }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4872,6 +4943,42 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)); +} + +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -4910,6 +5017,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { + case BPF_MAP_TYPE_RINGBUF: + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -4923,7 +5033,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5109,9 +5218,11 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } @@ -5555,6 +5666,22 @@ static int record_relo_core(struct bpf_program *prog, return 0; } +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + static int bpf_core_resolve_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, @@ -5611,7 +5738,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct bpf_program *prog; struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5632,32 +5759,18 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { if (rec->insn_off % BPF_INSN_SZ) @@ -5665,15 +5778,22 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) continue; /* adjust insn_idx from section frame of reference to the local @@ -5685,16 +5805,16 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return -EINVAL; insn = &prog->insns[insn_idx]; - if (prog->obj->gen_loader) { - err = record_relo_core(prog, rec, insn_idx); - if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); - goto out; - } - continue; + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; } + if (prog->obj->gen_loader) + continue; + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", @@ -5725,6 +5845,36 @@ out: return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define MAP_LDIMM64_POISON_BASE 2001000000 +#define MAP_LDIMM64_POISON_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; + + insn++; + } +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5738,33 +5888,35 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_EXTERN_VAR: @@ -5834,14 +5986,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -6236,7 +6387,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6297,8 +6447,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6334,7 +6483,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); @@ -6349,7 +6498,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_data(obj, prog); if (err) { @@ -6358,8 +6507,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6606,17 +6754,32 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; - if (def & SEC_DEPRECATED) + if (def & SEC_DEPRECATED) { pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", prog->sec_name); + } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6636,6 +6799,8 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, return 0; } +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, const char *license, __u32 kern_version, @@ -6695,6 +6860,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog prog->name, err); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6706,7 +6873,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6782,6 +6949,10 @@ retry_load: goto retry_load; ret = -errno; + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); @@ -6790,10 +6961,6 @@ retry_load: pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6801,6 +6968,169 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err, spec_len; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" are map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= MAP_LDIMM64_POISON_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + /* failed CO-RE relocation case */ + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6946,7 +7276,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) { + if (!prog->autoload) { pr_debug("prog '%s': skipped loading\n", prog->name); continue; } @@ -6955,8 +7285,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - if (obj->gen_loader) - bpf_object__free_relocs(obj); + + bpf_object__free_relocs(obj); return 0; } @@ -6976,8 +7306,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -7985,7 +8315,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { @@ -8200,6 +8530,9 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); @@ -8423,7 +8756,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) @@ -8431,7 +8764,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) if (prog->obj->loaded) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } @@ -8457,6 +8790,26 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) +{ + struct bpf_insn *insns; + + if (prog->obj->loaded) + return -EBUSY; + + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); + return -ENOMEM; + } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); + + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; + return 0; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8519,9 +8872,13 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->type = type; + return 0; } static bool bpf_program__is_type(const struct bpf_program *prog, @@ -8535,8 +8892,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ + return bpf_program__set_type(prog, TYPE); \ } \ \ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ @@ -8566,10 +8922,14 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -8630,6 +8990,8 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8641,33 +9003,34 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), - SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), - SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), @@ -9586,6 +9949,110 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) return libbpf_err_ptr(-ENOTSUP); } +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) +{ + if (map->fd <= 0) + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; +} + +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_delete_elem_flags(map->fd, key, flags); +} + +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_get_next_key(map->fd, cur_key, next_key); +} + long libbpf_get_error(const void *ptr) { if (!IS_ERR_OR_NULL(ptr)) @@ -9636,9 +10103,8 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, * bpf_object__open guessed */ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); + prog->type = attr->prog_type; + prog->expected_attach_type = attach_type; } if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* @@ -9692,14 +10158,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, return bpf_prog_load_xattr2(&attr, pobj, prog_fd); } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10391,6 +10849,12 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf char *func; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) func_name = prog->sec_name + sizeof("kretprobe/") - 1; @@ -10421,6 +10885,13 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru char *pattern; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); if (opts.retprobe) spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; @@ -10517,6 +10988,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__s390__) + return "/lib/s390-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10524,10 +11262,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10536,12 +11276,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); @@ -10589,6 +11354,60 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + int n, ret = -EINVAL; + long offset = 0; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + &probe_type, &binary_path, &func_name, &offset); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + case 4: + opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -10599,6 +11418,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + __u64 usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { @@ -10694,6 +11592,12 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin { char *sec_name, *tp_cat, *tp_name; + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; + sec_name = strdup(prog->sec_name); if (!sec_name) return -ENOMEM; @@ -10749,20 +11653,34 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); @@ -10774,12 +11692,17 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { + LIBBPF_OPTS(bpf_link_create_opts, link_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10791,7 +11714,9 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); @@ -10800,17 +11725,23 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) @@ -12211,7 +13142,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->load) + if (!prog->autoload) continue; /* auto-attaching not supported for this program */ |