diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-23 16:49:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-23 16:49:12 -0800 |
commit | 00198dab3b825ab264424a052beea5acb859754f (patch) | |
tree | 1afa5fc96a0447bc8049a9992e5d4d047f5f0b38 | |
parent | 9004fda59577d439564d44d6d1db52d262fe3f99 (diff) | |
parent | 3705b97505bcbf6440f38119c0e7d6058f585b54 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"On the kernel side there's two x86 PMU driver fixes and a uprobes fix,
plus on the tooling side there's a number of fixes and some late
updates"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
perf sched timehist: Fix invalid period calculation
perf sched timehist: Remove hardcoded 'comm_width' check at print_summary
perf sched timehist: Enlarge default 'comm_width'
perf sched timehist: Honour 'comm_width' when aligning the headers
perf/x86: Fix overlap counter scheduling bug
perf/x86/pebs: Fix handling of PEBS buffer overflows
samples/bpf: Move open_raw_sock to separate header
samples/bpf: Remove perf_event_open() declaration
samples/bpf: Be consistent with bpf_load_program bpf_insn parameter
tools lib bpf: Add bpf_prog_{attach,detach}
samples/bpf: Switch over to libbpf
perf diff: Do not overwrite valid build id
perf annotate: Don't throw error for zero length symbols
perf bench futex: Fix lock-pi help string
perf trace: Check if MAP_32BIT is defined (again)
samples/bpf: Make perf_event_read() static
uprobes: Fix uprobes on MIPS, allow for a cache flush after ixol breakpoint creation
samples/bpf: Make samples more libbpf-centric
tools lib bpf: Add flags to bpf_create_map()
tools lib bpf: use __u32 from linux/types.h
...
63 files changed, 1104 insertions, 750 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cb8522290e6a..86138267b68a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2110,6 +2110,27 @@ again: GLOBAL_STATUS_LBRS_FROZEN); if (!status) goto done; + /* + * In case multiple PEBS events are sampled at the same time, + * it is possible to have GLOBAL_STATUS bit 62 set indicating + * PEBS buffer overflow and also seeing at most 3 PEBS counters + * having their bits set in the status register. This is a sign + * that there was at least one PEBS record pending at the time + * of the PMU interrupt. PEBS counters must only be processed + * via the drain_pebs() calls and not via the regular sample + * processing loop coming after that the function, otherwise + * phony regular samples may be generated in the sampling buffer + * not marked with the EXACT tag. Another possibility is to have + * one PEBS event and at least one non-PEBS event whic hoverflows + * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will + * not be set, yet the overflow status bit for the PEBS counter will + * be on Skylake. + * + * To avoid this problem, we systematically ignore the PEBS-enabled + * counters from the GLOBAL_STATUS mask and we always process PEBS + * events via drain_pebs(). + */ + status &= ~cpuc->pebs_enabled; /* * PEBS overflow sets bit 62 in the global status register @@ -2117,15 +2138,6 @@ again: if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); - /* - * There are cases where, even though, the PEBS ovfl bit is set - * in GLOBAL_OVF_STATUS, the PEBS events may also have their - * overflow bits set for their counters. We must clear them - * here because they have been processed as exact samples in - * the drain_pebs() routine. They must not be processed again - * in the for_each_bit_set() loop for regular samples below. - */ - status &= ~cpuc->pebs_enabled; status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 272427700d48..e6832be714bc 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -669,7 +669,7 @@ static struct event_constraint snbep_uncore_cbox_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), - EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), + UNCORE_EVENT_CONSTRAINT(0x1f, 0xe), UNCORE_EVENT_CONSTRAINT(0x21, 0x3), UNCORE_EVENT_CONSTRAINT(0x23, 0x3), UNCORE_EVENT_CONSTRAINT(0x31, 0x3), diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 215871bda3a2..d416f3baf392 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1194,7 +1194,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); atomic_set(&area->slot_count, 1); - copy_to_page(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); + arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); if (!xol_add_vma(mm, area)) return area; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 13315ff1193c..09e9d535bd74 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -35,40 +35,43 @@ hostprogs-y += tc_l2_redirect hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel -test_lru_dist-objs := test_lru_dist.o libbpf.o -sock_example-objs := sock_example.o libbpf.o -fds_example-objs := bpf_load.o libbpf.o fds_example.o -sockex1-objs := bpf_load.o libbpf.o sockex1_user.o -sockex2-objs := bpf_load.o libbpf.o sockex2_user.o -sockex3-objs := bpf_load.o libbpf.o sockex3_user.o -tracex1-objs := bpf_load.o libbpf.o tracex1_user.o -tracex2-objs := bpf_load.o libbpf.o tracex2_user.o -tracex3-objs := bpf_load.o libbpf.o tracex3_user.o -tracex4-objs := bpf_load.o libbpf.o tracex4_user.o -tracex5-objs := bpf_load.o libbpf.o tracex5_user.o -tracex6-objs := bpf_load.o libbpf.o tracex6_user.o -test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o -trace_output-objs := bpf_load.o libbpf.o trace_output_user.o -lathist-objs := bpf_load.o libbpf.o lathist_user.o -offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o -spintest-objs := bpf_load.o libbpf.o spintest_user.o -map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o -test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o -test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o -test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o -test_cgrp2_attach2-objs := libbpf.o test_cgrp2_attach2.o cgroup_helpers.o -test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o -test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o -xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# Libbpf dependencies +LIBBPF := ../../tools/lib/bpf/bpf.o + +test_lru_dist-objs := test_lru_dist.o $(LIBBPF) +sock_example-objs := sock_example.o $(LIBBPF) +fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o +sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o +sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o +sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o +tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o +tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o +tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o +tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o +tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o +tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o +trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o +lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o +offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o +spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o +map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o +test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o +test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o +test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o +test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o +test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o +xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally -xdp2-objs := bpf_load.o libbpf.o xdp1_user.o -test_current_task_under_cgroup-objs := bpf_load.o libbpf.o cgroup_helpers.o \ +xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ test_current_task_under_cgroup_user.o -trace_event-objs := bpf_load.o libbpf.o trace_event_user.o -sampleip-objs := bpf_load.o libbpf.o sampleip_user.o -tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o -lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o +trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o +sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o +tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o +lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o +xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -104,7 +107,10 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o HOSTCFLAGS += -I$(objtree)/usr/include +HOSTCFLAGS += -I$(srctree)/tools/lib/ HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include +HOSTCFLAGS += -I$(srctree)/tools/perf HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_fds_example += -lelf diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index a43eae3f0551..79f9a58f1872 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -1,8 +1,8 @@ eBPF sample programs ==================== -This directory contains a mini eBPF library, test stubs, verifier -test-suite and examples for using eBPF. +This directory contains a test stubs, verifier test-suite and examples +for using eBPF. The examples use libbpf from tools/lib/bpf. Build dependencies ================== diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index e30b6de94f2e..396e204888b3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -22,25 +22,34 @@ #include <poll.h> #include <ctype.h> #include "libbpf.h" -#include "bpf_helpers.h" #include "bpf_load.h" +#include "perf-sys.h" #define DEBUGFS "/sys/kernel/debug/tracing/" static char license[128]; static int kern_version; static bool processed_sec[128]; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; int event_fd[MAX_PROGS]; int prog_cnt; int prog_array_fd = -1; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + static int populate_prog_array(const char *event, int prog_fd) { int ind = atoi(event), err; - err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); if (err < 0) { printf("failed to store prog_fd in prog_array\n"); return -1; @@ -58,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -87,9 +97,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } - fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, + bpf_log_buf, BPF_LOG_BUF_SIZE); if (fd < 0) { - printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); return -1; } @@ -169,7 +180,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) id = atoi(buf); attr.config = id; - efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); if (efd < 0) { printf("event %d fd %d err %s\n", id, efd, strerror(errno)); return -1; diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index fb46a421ab41..c827827299b3 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -1,12 +1,15 @@ #ifndef __BPF_LOAD_H #define __BPF_LOAD_H +#include "libbpf.h" + #define MAX_MAPS 32 #define MAX_PROGS 32 extern int map_fd[MAX_MAPS]; extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; +extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; extern int prog_cnt; /* parses elf file compiled by llvm .c->.o diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 625e797be6ef..e29bd52ff9e8 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -14,6 +14,7 @@ #include "bpf_load.h" #include "libbpf.h" +#include "sock_example.h" #define BPF_F_PIN (1 << 0) #define BPF_F_GET (1 << 1) @@ -49,17 +50,19 @@ static int bpf_map_create(void) static int bpf_prog_create(const char *object) { - static const struct bpf_insn insns[] = { + static struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); if (object) { assert(!load_bpf_file((char *)object)); return prog_fd[0]; } else { - return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, - insns, sizeof(insns), "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, + insns, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } } @@ -83,12 +86,12 @@ static int bpf_do_map(const char *file, uint32_t flags, uint32_t key, } if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) { - ret = bpf_update_elem(fd, &key, &value, 0); + ret = bpf_map_update_elem(fd, &key, &value, 0); printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value, ret, strerror(errno)); assert(ret == 0); } else if (flags & BPF_F_KEY) { - ret = bpf_lookup_elem(fd, &key, &value); + ret = bpf_map_lookup_elem(fd, &key, &value); printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value, ret, strerror(errno)); assert(ret == 0); diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c index 65da8c1576de..6477bad5b4e2 100644 --- a/samples/bpf/lathist_user.c +++ b/samples/bpf/lathist_user.c @@ -73,7 +73,7 @@ static void get_data(int fd) for (c = 0; c < MAX_CPU; c++) { for (i = 0; i < MAX_ENTRIES; i++) { key = c * MAX_ENTRIES + i; - bpf_lookup_elem(fd, &key, &value); + bpf_map_lookup_elem(fd, &key, &value); cpu_hist[c].data[i] = value; if (value > cpu_hist[c].max) diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c deleted file mode 100644 index 9ce707bf02a7..000000000000 --- a/samples/bpf/libbpf.c +++ /dev/null @@ -1,176 +0,0 @@ -/* eBPF mini library */ -#include <stdlib.h> -#include <stdio.h> -#include <linux/unistd.h> -#include <unistd.h> -#include <string.h> -#include <linux/netlink.h> -#include <linux/bpf.h> -#include <errno.h> -#include <net/ethernet.h> -#include <net/if.h> -#include <linux/if_packet.h> -#include <arpa/inet.h> -#include "libbpf.h" - -static __u64 ptr_to_u64(void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, int map_flags) -{ - union bpf_attr attr = { - .map_type = map_type, - .key_size = key_size, - .value_size = value_size, - .max_entries = max_entries, - .map_flags = map_flags, - }; - - return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); -} - -int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .value = ptr_to_u64(value), - .flags = flags, - }; - - return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); -} - -int bpf_lookup_elem(int fd, void *key, void *value) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .value = ptr_to_u64(value), - }; - - return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); -} - -int bpf_delete_elem(int fd, void *key) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - }; - - return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); -} - -int bpf_get_next_key(int fd, void *key, void *next_key) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = ptr_to_u64(key), - .next_key = ptr_to_u64(next_key), - }; - - return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); -} - -#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) - -char bpf_log_buf[LOG_BUF_SIZE]; - -int bpf_prog_load(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int prog_len, - const char *license, int kern_version) -{ - union bpf_attr attr = { - .prog_type = prog_type, - .insns = ptr_to_u64((void *) insns), - .insn_cnt = prog_len / sizeof(struct bpf_insn), - .license = ptr_to_u64((void *) license), - .log_buf = ptr_to_u64(bpf_log_buf), - .log_size = LOG_BUF_SIZE, - .log_level = 1, - }; - - /* assign one field outside of struct init to make sure any - * padding is zero initialized - */ - attr.kern_version = kern_version; - - bpf_log_buf[0] = 0; - - return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); -} - -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr = { - .target_fd = target_fd, - .attach_bpf_fd = prog_fd, - .attach_type = type, - }; - - return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); -} - -int bpf_prog_detach(int target_fd, enum bpf_attach_type type) -{ - union bpf_attr attr = { - .target_fd = target_fd, - .attach_type = type, - }; - - return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); -} - -int bpf_obj_pin(int fd, const char *pathname) -{ - union bpf_attr attr = { - .pathname = ptr_to_u64((void *)pathname), - .bpf_fd = fd, - }; - - return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr)); -} - -int bpf_obj_get(const char *pathname) -{ - union bpf_attr attr = { - .pathname = ptr_to_u64((void *)pathname), - }; - - return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr)); -} - -int open_raw_sock(const char *name) -{ - struct sockaddr_ll sll; - int sock; - - sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); - if (sock < 0) { - printf("cannot create raw socket\n"); - return -1; - } - - memset(&sll, 0, sizeof(sll)); - sll.sll_family = AF_PACKET; - sll.sll_ifindex = if_nametoindex(name); - sll.sll_protocol = htons(ETH_P_ALL); - if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { - printf("bind to %s: %s\n", name, strerror(errno)); - close(sock); - return -1; - } - - return sock; -} - -int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, - int group_fd, unsigned long flags) -{ - return syscall(__NR_perf_event_open, attr, pid, cpu, - group_fd, flags); -} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 94a901d86fc2..3705fba453a0 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -2,27 +2,9 @@ #ifndef __LIBBPF_H #define __LIBBPF_H -struct bpf_insn; - -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, int map_flags); -int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); -int bpf_lookup_elem(int fd, void *key, void *value); -int bpf_delete_elem(int fd, void *key); -int bpf_get_next_key(int fd, void *key, void *next_key); - -int bpf_prog_load(enum bpf_prog_type prog_type, - const struct bpf_insn *insns, int insn_len, - const char *license, int kern_version); - -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); -int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +#include <bpf/bpf.h> -int bpf_obj_pin(int fd, const char *pathname); -int bpf_obj_get(const char *pathname); - -#define LOG_BUF_SIZE (256 * 1024) -extern char bpf_log_buf[LOG_BUF_SIZE]; +struct bpf_insn; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -203,10 +185,4 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = 0 }) -/* create RAW socket and bind to interface 'name' */ -int open_raw_sock(const char *name); - -struct perf_event_attr; -int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, - int group_fd, unsigned long flags); #endif diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c index 05d783fc5daf..ec8f3bbcbef3 100644 --- a/samples/bpf/lwt_len_hist_user.c +++ b/samples/bpf/lwt_len_hist_user.c @@ -14,6 +14,8 @@ #define MAX_INDEX 64 #define MAX_STARS 38 +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static void stars(char *str, long val, long max, int width) { int i; @@ -41,13 +43,13 @@ int main(int argc, char **argv) return -1; } - while (bpf_get_next_key(map_fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) { if (next_key >= MAX_INDEX) { fprintf(stderr, "Key %lu out of bounds\n", next_key); continue; } - bpf_lookup_elem(map_fd, &next_key, values); + bpf_map_lookup_elem(map_fd, &next_key, values); sum = 0; for (i = 0; i < nr_cpus; i++) diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 6f002a9c24fa..9cce2a66bd66 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -49,14 +49,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%s;", key->target); - if (bpf_lookup_elem(map_fd[3], &key->tret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_lookup_elem(map_fd[3], &key->wret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) { printf("---;"); } else { for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) @@ -77,8 +77,8 @@ static void print_stacks(int fd) struct key_t key = {}, next_key; __u64 value; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); print_stack(&next_key, value); key = next_key; } diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 260a6bdd6413..be59d7dcbdde 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -21,6 +21,7 @@ #include <sys/ioctl.h> #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define DEFAULT_FREQ 99 #define DEFAULT_SECS 5 @@ -49,7 +50,7 @@ static int sampling_start(int *pmu_fd, int freq) }; for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, + pmu_fd[i] = sys_perf_event_open(&pe_sample_attr, -1 /* pid */, i, -1 /* group_fd */, 0 /* flags */); if (pmu_fd[i] < 0) { fprintf(stderr, "ERROR: Initializing perf sampling\n"); @@ -95,8 +96,8 @@ static void print_ip_map(int fd) /* fetch IPs and counts */ key = 0, i = 0; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); counts[i].ip = next_key; counts[i++].count = value; key = next_key; diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 28b60baa9fa8..6fc6e193ef1b 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -27,6 +27,9 @@ #include <linux/ip.h> #include <stddef.h> #include "libbpf.h" +#include "sock_example.h" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; static int test_sock(void) { @@ -54,9 +57,10 @@ static int test_sock(void) BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), - "GPL", 0); + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt, + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { printf("failed to load prog '%s'\n", strerror(errno)); goto cleanup; @@ -72,13 +76,13 @@ static int test_sock(void) for (i = 0; i < 10; i++) { key = IPPROTO_TCP; - assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0); key = IPPROTO_UDP; - assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0); key = IPPROTO_ICMP; - assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0); printf("TCP %lld UDP %lld ICMP %lld packets\n", tcp_cnt, udp_cnt, icmp_cnt); diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h new file mode 100644 index 000000000000..09f7fe7e5fd7 --- /dev/null +++ b/samples/bpf/sock_example.h @@ -0,0 +1,35 @@ +#include <stdlib.h> +#include <stdio.h> +#include <linux/unistd.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <linux/if_packet.h> +#include <arpa/inet.h> +#include "libbpf.h" + +static inline int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 678ce4693551..6cd2feb3e9b3 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include <unistd.h> #include <arpa/inet.h> @@ -32,13 +33,13 @@ int main(int ac, char **argv) int key; key = IPPROTO_TCP; - assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); key = IPPROTO_UDP; - assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); key = IPPROTO_ICMP; - assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); printf("TCP %lld UDP %lld ICMP %lld bytes\n", tcp_cnt, udp_cnt, icmp_cnt); diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 8a4085c2d117..0e0207c90841 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include <unistd.h> #include <arpa/inet.h> #include <sys/resource.h> @@ -39,8 +40,8 @@ int main(int ac, char **argv) int key = 0, next_key; struct pair value; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); printf("ip %s bytes %lld packets %lld\n", inet_ntoa((struct in_addr){htonl(next_key)}), value.bytes, value.packets); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 3fcfd8c4b2a3..b5524d417eb5 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include "libbpf.h" #include "bpf_load.h" +#include "sock_example.h" #include <unistd.h> #include <arpa/inet.h> #include <sys/resource.h> @@ -54,8 +55,8 @@ int main(int argc, char **argv) sleep(1); printf("IP src.port -> dst.port bytes packets\n"); - while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[2], &next_key, &value); + while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[2], &next_key, &value); printf("%s.%05d -> %s.%05d %12lld %12lld\n", inet_ntoa((struct in_addr){htonl(next_key.src)}), next_key.port16[0], diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 311ede532230..80676c25fa50 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -31,8 +31,8 @@ int main(int ac, char **argv) for (i = 0; i < 5; i++) { key = 0; printf("kprobing funcs:"); - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); assert(next_key == value); sym = ksym_search(value); printf(" %s", sym->name); @@ -41,8 +41,8 @@ int main(int ac, char **argv) if (key) printf("\n"); key = 0; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) - bpf_delete_elem(map_fd[0], &next_key); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) + bpf_map_delete_elem(map_fd[0], &next_key); sleep(1); } diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c index 4013c5337b91..28995a776560 100644 --- a/samples/bpf/tc_l2_redirect_user.c +++ b/samples/bpf/tc_l2_redirect_user.c @@ -60,9 +60,9 @@ int main(int argc, char **argv) } /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ - ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + ret = bpf_map_update_elem(array_fd, &array_key, &ifindex, 0); if (ret) { - perror("bpf_update_elem"); + perror("bpf_map_update_elem"); goto out; } diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c index 70e86f7be69d..8a1b8b5d8def 100644 --- a/samples/bpf/test_cgrp2_array_pin.c +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -85,9 +85,9 @@ int main(int argc, char **argv) } } - ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + ret = bpf_map_update_elem(array_fd, &array_key, &cg2_fd, 0); if (ret) { - perror("bpf_update_elem"); + perror("bpf_map_update_elem"); goto out; } diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index a19484c45b79..504058631ffc 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -36,6 +36,8 @@ enum { MAP_KEY_BYTES, }; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int map_fd, int verdict) { struct bpf_insn prog[] = { @@ -66,9 +68,11 @@ static int prog_load(int map_fd, int verdict) BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); } static int usage(const char *argv0) @@ -108,10 +112,10 @@ static int attach_filter(int cg_fd, int type, int verdict) } while (1) { key = MAP_KEY_PACKETS; - assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &pkt_cnt) == 0); key = MAP_KEY_BYTES; - assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &byte_cnt) == 0); printf("cgroup received %lld packets, %lld bytes\n", pkt_cnt, byte_cnt); diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index ddfac42ed4df..6e69be37f87f 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -32,6 +32,8 @@ #define BAR "/foo/bar/" #define PING_CMD "ping -c1 -w1 127.0.0.1" +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int verdict) { int ret; @@ -39,9 +41,11 @@ static int prog_load(int verdict) BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, - prog, sizeof(prog), "GPL", 0); + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); if (ret < 0) { log_err("Loading program"); diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index d467b3c1c55c..0791b949cbe4 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -23,6 +23,8 @@ #include "libbpf.h" +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static int prog_load(int idx) { struct bpf_insn prog[] = { @@ -33,9 +35,10 @@ static int prog_load(int idx) BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ BPF_EXIT_INSN(), }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), - "GPL", 0); + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } static int usage(const char *argv0) diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 95aaaa846130..65b5fb51c1db 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -36,7 +36,7 @@ int main(int argc, char **argv) if (!cg2) goto err; - if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + if (bpf_map_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { log_err("Adding target cgroup to map"); goto err; } @@ -50,7 +50,7 @@ int main(int argc, char **argv) */ sync(); - bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); if (local_pid != remote_pid) { fprintf(stderr, @@ -64,10 +64,10 @@ int main(int argc, char **argv) goto err; remote_pid = 0; - bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + bpf_map_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); sync(); - bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + bpf_map_lookup_elem(map_fd[1], &idx, &remote_pid); if (local_pid == remote_pid) { fprintf(stderr, "BPF cgroup negative test did not work\n"); diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 316230a0ed23..d96dc88d3b04 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -134,7 +134,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, int seen = 0; lru->total++; - if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { + if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { if (node) { list_move(&node->list, &lru->list); return 1; @@ -151,7 +151,7 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, node = list_last_entry(&lru->list, struct pfect_lru_node, list); - bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); + bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); } node->key = key; @@ -159,10 +159,10 @@ static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, lru->nr_misses++; if (seen) { - assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); + assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); } else { lru->nr_unique++; - assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); + assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); } return seen; @@ -285,11 +285,11 @@ static void do_test_lru_dist(int task, void *data) pfect_lru_lookup_or_insert(&pfect_lru, key); - if (!bpf_lookup_elem(lru_map_fd, &key, &value)) + if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) continue; - if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { - printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", + if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { + printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n", key, errno); assert(0); } @@ -358,19 +358,19 @@ static void test_lru_loss0(int map_type, int map_flags) for (key = 1; key <= 1000; key++) { int start_key, end_key; - assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); + assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); start_key = 101; end_key = min(key, 900); while (start_key <= end_key) { - bpf_lookup_elem(map_fd, &start_key, value); + bpf_map_lookup_elem(map_fd, &start_key, value); start_key++; } } for (key = 1; key <= 1000; key++) { - if (bpf_lookup_elem(map_fd, &key, value)) { + if (bpf_map_lookup_elem(map_fd, &key, value)) { if (key <= 100) old_unused_losses++; else if (key <= 900) @@ -408,10 +408,10 @@ static void test_lru_loss1(int map_type, int map_flags) value[0] = 1234; for (key = 1; key <= 1000; key++) - assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); for (key = 1; key <= 1000; key++) { - if (bpf_lookup_elem(map_fd, &key, value)) + if (bpf_map_lookup_elem(map_fd, &key, value)) nr_losses++; } @@ -436,7 +436,7 @@ static void do_test_parallel_lru_loss(int task, void *data) next_ins_key = stable_base; value[0] = 1234; for (i = 0; i < nr_stable_elems; i++) { - assert(bpf_update_elem(map_fd, &next_ins_key, value, + assert(bpf_map_update_elem(map_fd, &next_ins_key, value, BPF_NOEXIST) == 0); next_ins_key++; } @@ -448,9 +448,9 @@ static void do_test_parallel_lru_loss(int task, void *data) if (rn % 10) { key = rn % nr_stable_elems + stable_base; - bpf_lookup_elem(map_fd, &key, value); + bpf_map_lookup_elem(map_fd, &key, value); } else { - bpf_update_elem(map_fd, &next_ins_key, value, + bpf_map_update_elem(map_fd, &next_ins_key, value, BPF_NOEXIST); next_ins_key++; } @@ -458,7 +458,7 @@ static void do_test_parallel_lru_loss(int task, void *data) key = stable_base; for (i = 0; i < nr_stable_elems; i++) { - if (bpf_lookup_elem(map_fd, &key, value)) + if (bpf_map_lookup_elem(map_fd, &key, value)) nr_losses++; key++; } diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index a44bf347bedd..b5bf178a6ecc 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -50,7 +50,7 @@ int main(int ac, char **argv) mapped_addr_in->sin_port = htons(5555); mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); - assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); assert(listen(serverfd, 5) == 0); diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 9a130d31ecf2..0c5561d193a4 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -20,6 +20,7 @@ #include <sys/resource.h> #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define SAMPLE_FREQ 50 @@ -61,14 +62,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%3lld %s;", count, key->comm); - if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) @@ -98,10 +99,10 @@ static void print_stacks(void) int fd = map_fd[0], stack_map = map_fd[1]; sys_read_seen = sys_write_seen = false; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { - bpf_lookup_elem(fd, &next_key, &value); + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &value); print_stack(&next_key, value); - bpf_delete_elem(fd, &next_key); + bpf_map_delete_elem(fd, &next_key); key = next_key; } @@ -111,8 +112,8 @@ static void print_stacks(void) } /* clear stack map */ - while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { - bpf_delete_elem(stack_map, &next_id); + while (bpf_map_get_next_key(stack_map, &stackid, &next_id) == 0) { + bpf_map_delete_elem(stack_map, &next_id); stackid = next_id; } } @@ -125,9 +126,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); + pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); if (pmu_fd[i] < 0) { - printf("perf_event_open failed\n"); + printf("sys_perf_event_open failed\n"); goto all_cpu_err; } assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); @@ -146,9 +147,9 @@ static void test_perf_event_task(struct perf_event_attr *attr) int pmu_fd; /* open task bound event */ - pmu_fd = perf_event_open(attr, 0, -1, -1, 0); + pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { - printf("perf_event_open failed\n"); + printf("sys_perf_event_open failed\n"); return; } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 661a7d052f2c..f4fa6af22def 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -21,6 +21,7 @@ #include <signal.h> #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" static int pmu_fd; @@ -61,7 +62,7 @@ struct perf_event_sample { char data[]; }; -void perf_event_read(print_fn fn) +static void perf_event_read(print_fn fn) { __u64 data_tail = header->data_tail; __u64 data_head = header->data_head; @@ -159,10 +160,10 @@ static void test_bpf_perf_event(void) }; int key = 0; - pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + pmu_fd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); assert(pmu_fd >= 0); - assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); + assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0); ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); } diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 3e225e331f66..ded9804c5034 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -48,12 +48,12 @@ static void print_hist_for_pid(int fd, void *task) long max_value = 0; int i, ind; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { if (memcmp(&next_key, task, SIZE)) { key = next_key; continue; } - bpf_lookup_elem(fd, &next_key, values); + bpf_map_lookup_elem(fd, &next_key, values); value = 0; for (i = 0; i < nr_cpus; i++) value += values[i]; @@ -83,7 +83,7 @@ static void print_hist(int fd) int task_cnt = 0; int i; - while (bpf_get_next_key(fd, &key, &next_key) == 0) { + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { int found = 0; for (i = 0; i < task_cnt; i++) @@ -136,8 +136,8 @@ int main(int ac, char **argv) for (i = 0; i < 5; i++) { key = 0; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &value); printf("location 0x%lx count %ld\n", next_key, value); key = next_key; } diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index d0851cb4fa8d..8f7d199d5945 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -28,7 +28,7 @@ static void clear_stats(int fd) memset(values, 0, sizeof(values)); for (key = 0; key < SLOTS; key++) - bpf_update_elem(fd, &key, values, BPF_ANY); + bpf_map_update_elem(fd, &key, values, BPF_ANY); } const char *color[] = { @@ -89,7 +89,7 @@ static void print_hist(int fd) int i; for (key = 0; key < SLOTS; key++) { - bpf_lookup_elem(fd, &key, values); + bpf_map_lookup_elem(fd, &key, values); value = 0; for (i = 0; i < nr_cpus; i++) value += values[i]; diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index bc4a3bdea6ed..03449f773cb1 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -37,8 +37,8 @@ static void print_old_objects(int fd) key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ key = -1; - while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_lookup_elem(map_fd[0], &next_key, &v); + while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd[0], &next_key, &v); key = next_key; if (val - v.val < 1000000000ll) /* object was allocated more then 1 sec ago */ diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 8ea4976cfcf1..ca7874ed77f4 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -10,6 +10,7 @@ #include <linux/bpf.h> #include "libbpf.h" #include "bpf_load.h" +#include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL @@ -30,13 +31,13 @@ static void test_bpf_perf_event(void) }; for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); + pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); if (pmu_fd[i] < 0) { printf("event syscall failed\n"); goto exit; } - bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); + bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); } diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 5f040a0d7712..d2be65d1fd86 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -43,7 +43,7 @@ static void poll_stats(int interval) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 7a71f5c74684..70e192fc61aa 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -51,7 +51,7 @@ static void poll_stats(unsigned int kill_after_s) for (proto = 0; proto < nr_protos; proto++) { __u64 sum = 0; - assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0); + assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[proto][i]); @@ -237,8 +237,8 @@ int main(int argc, char **argv) while (min_port <= max_port) { vip.dport = htons(min_port++); - if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { - perror("bpf_update_elem(&vip2tnl)"); + if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + perror("bpf_map_update_elem(&vip2tnl)"); return 1; } } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9e5fc168c8a3..0eb0e87dbe9f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,6 +73,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -85,6 +87,8 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, }; enum bpf_prog_type { @@ -95,8 +99,23 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -105,6 +124,13 @@ enum bpf_prog_type { #define BPF_EXIST 2 /* update existing element */ #define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -140,243 +166,327 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ @@ -450,6 +560,31 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8143536b462a..3ddb58a36d3c 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -54,7 +54,7 @@ static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, } int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries) + int value_size, int max_entries, __u32 map_flags) { union bpf_attr attr; @@ -64,13 +64,14 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, attr.key_size = key_size; attr.value_size = value_size; attr.max_entries = max_entries; + attr.map_flags = map_flags; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, size_t log_buf_sz) + __u32 kern_version, char *log_buf, size_t log_buf_sz) { int fd; union bpf_attr attr; @@ -98,7 +99,7 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, } int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags) + __u64 flags) { union bpf_attr attr; @@ -166,3 +167,26 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } + +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 253c3dbb06b4..a2f9853dd882 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,22 +24,25 @@ #include <linux/bpf.h> int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries); + int max_entries, __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns, size_t insns_cnt, char *license, - u32 kern_version, char *log_buf, + __u32 kern_version, char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, void *key, void *value, - u64 flags); + __u64 flags); int bpf_map_lookup_elem(int fd, void *key, void *value); int bpf_map_delete_elem(int fd, void *key); int bpf_map_get_next_key(int fd, void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); + #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e974593f3e8..84e6b35da4bd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -854,7 +854,8 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map(def->type, def->key_size, def->value_size, - def->max_entries); + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 7775b1eb2bee..76173969ab80 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-I:: +--idle-hist:: + Show idle-related events only. + --time:: Only analyze samples within given time window: <start>,<stop>. Times have the format seconds.microseconds. If start is not given (i.e., time diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e5af38eede17..8fc24824705e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) $(goals) all: sub-make sub-make: fixdep + @./check-headers.sh $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals) else # force_fixdep @@ -404,99 +405,6 @@ export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE - @(test -f ../../include/uapi/linux/perf_event.h && ( \ - (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/hash.h && ( \ - (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \ - || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \ - (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ - (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/required-features.h && ( \ - (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ - (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memset_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/__fls.h && ( \ - (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls.h && ( \ - (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls64.h && ( \ - (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/coresight-pmu.h && ( \ - (diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \ - || echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman-common.h && ( \ - (diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(JEVENTS_IN): FORCE diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 465012b320ee..6d9d6c40a916 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -48,7 +48,7 @@ static const struct option options[] = { }; static const char * const bench_futex_lock_pi_usage[] = { - "perf bench futex requeue <options>", + "perf bench futex lock-pi <options>", NULL }; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4b419631753d..f8ca7a4ebabc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he, static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, struct machine *machine) { struct c2c_hists *c2c_hists = &c2c.hists; @@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se) static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct hists *hists __maybe_unused) + struct hists *hists) { struct c2c_fmt *c2c_fmt; struct c2c_dimension *dim; @@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); static int -cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { int width = c2c_width(fmt, hpp, he->hists); @@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists, return hpp_list__parse(&hists->list, NULL, sort); } -__maybe_unused static int c2c_hists__reinit(struct c2c_hists *c2c_hists, const char *output, const char *sort) @@ -2658,7 +2657,7 @@ out: return err; } -static int parse_record_events(const struct option *opt __maybe_unused, +static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { bool *event_set = (bool *) opt->value; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e..cd7bc4d104e2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fa26865364b6..74d6a035133a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + /* Enable ignoring missing threads when -u option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d2afbe4a240d..06cc759a4597 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, } static int -parse_branch_mode(const struct option *opt __maybe_unused, +parse_branch_mode(const struct option *opt, const char *str __maybe_unused, int unset) { int *branch_mode = opt->value; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1a3f1be93372..d53e706a6f17 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -200,6 +200,7 @@ struct perf_sched { /* options for timehist command */ bool summary; bool summary_only; + bool idle_hist; bool show_callchain; unsigned int max_stack; bool show_cpu_visual; @@ -230,6 +231,15 @@ struct evsel_runtime { u32 ncpu; /* highest cpu slot allocated */ }; +/* per cpu idle time data */ +struct idle_thread_runtime { + struct thread_runtime tr; + struct thread *last_thread; + struct rb_root sorted_root; + struct callchain_root callchain; + struct callchain_cursor cursor; +}; + /* track idle times per cpu */ static struct thread **idle_threads; static int idle_max_cpu; @@ -1765,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) return r->last_time[cpu]; } -static int comm_width = 20; +static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) { @@ -1807,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - printf(" %-20s %9s %9s %9s", + printf(" %-*s %9s %9s %9s", comm_width, "task name", "wait time", "sch delay", "run time"); printf("\n"); @@ -1820,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + printf(" %-*s %9s %9s %9s\n", comm_width, + "[tid/pid]", "(msec)", "(msec)", "(msec)"); /* * separator @@ -1830,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - printf(" %.20s %.9s %.9s %.9s", + printf(" %.*s %.9s %.9s %.9s", comm_width, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line); @@ -1939,39 +1950,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_run_time += r->dt_run; } -static bool is_idle_sample(struct perf_sched *sched, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine) +static bool is_idle_sample(struct perf_sample *sample, + struct perf_evsel *evsel) { - struct thread *thread; - struct callchain_cursor *cursor = &callchain_cursor; - /* pid 0 == swapper == idle task */ - if (sample->pid == 0) - return true; + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) + return perf_evsel__intval(evsel, sample, "prev_pid") == 0; - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { - if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) - return true; - } + return sample->pid == 0; +} + +static void save_task_callchain(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct callchain_cursor *cursor = &callchain_cursor; + struct thread *thread; /* want main thread for process - has maps */ thread = machine__findnew_thread(machine, sample->pid, sample->pid); if (thread == NULL) { pr_debug("Failed to get thread for pid %d.\n", sample->pid); - return false; + return; } if (!symbol_conf.use_callchain || sample->callchain == NULL) - return false; + return; if (thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, sched->max_stack + 2) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); - return false; + return; } callchain_cursor_commit(cursor); @@ -1994,8 +2006,24 @@ static bool is_idle_sample(struct perf_sched *sched, callchain_cursor_advance(cursor); } +} + +static int init_idle_thread(struct thread *thread) +{ + struct idle_thread_runtime *itr; + + thread__set_comm(thread, idle_comm, 0); + + itr = zalloc(sizeof(*itr)); + if (itr == NULL) + return -ENOMEM; - return false; + init_stats(&itr->tr.run_stats); + callchain_init(&itr->callchain); + callchain_cursor_reset(&itr->cursor); + thread__set_priv(thread, itr); + + return 0; } /* @@ -2004,7 +2032,7 @@ static bool is_idle_sample(struct perf_sched *sched, */ static int init_idle_threads(int ncpu) { - int i; + int i, ret; idle_threads = zalloc(ncpu * sizeof(struct thread *)); if (!idle_threads) @@ -2018,7 +2046,9 @@ static int init_idle_threads(int ncpu) if (idle_threads[i] == NULL) return -ENOMEM; - thread__set_comm(idle_threads[i], idle_comm, 0); + ret = init_idle_thread(idle_threads[i]); + if (ret < 0) + return ret; } return 0; @@ -2065,14 +2095,23 @@ static struct thread *get_idle_thread(int cpu) idle_threads[cpu] = thread__new(0, 0); if (idle_threads[cpu]) { - idle_threads[cpu]->tid = 0; - thread__set_comm(idle_threads[cpu], idle_comm, 0); + if (init_idle_thread(idle_threads[cpu]) < 0) + return NULL; } } return idle_threads[cpu]; } +static void save_idle_callchain(struct idle_thread_runtime *itr, + struct perf_sample *sample) +{ + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return; + + callchain_cursor__copy(&itr->cursor, &callchain_cursor); +} + /* * handle runtime stats saved per thread */ @@ -2111,7 +2150,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, { struct thread *thread; - if (is_idle_sample(sched, sample, evsel, machine)) { + if (is_idle_sample(sample, evsel)) { thread = get_idle_thread(sample->cpu); if (thread == NULL) pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); @@ -2124,13 +2163,37 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, pr_debug("Failed to get thread for tid %d. skipping sample.\n", sample->tid); } + + save_task_callchain(sched, sample, evsel, machine); + if (sched->idle_hist) { + struct thread *idle; + struct idle_thread_runtime *itr; + + idle = get_idle_thread(sample->cpu); + if (idle == NULL) { + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + return NULL; + } + + itr = thread__priv(idle); + if (itr == NULL) + return NULL; + + itr->last_thread = thread; + + /* copy task callchain when entering to idle */ + if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + save_idle_callchain(itr, sample); + } } return thread; } static bool timehist_skip_sample(struct perf_sched *sched, - struct thread *thread) + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) { bool rc = false; @@ -2139,10 +2202,19 @@ static bool timehist_skip_sample(struct perf_sched *sched, sched->skipped_samples++; } + if (sched->idle_hist) { + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + rc = true; + else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && + perf_evsel__intval(evsel, sample, "next_pid") != 0) + rc = true; + } + return rc; } static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine, struct thread *awakened) @@ -2155,8 +2227,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, return; /* show wakeup unless both awakee and awaker are filtered */ - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, awakened)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, awakened, evsel, sample)) { return; } @@ -2201,7 +2273,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, /* show wakeups if requested */ if (sched->show_wakeups && !perf_time__skip_sample(&sched->ptime, sample->time)) - timehist_print_wakeup_event(sched, sample, machine, thread); + timehist_print_wakeup_event(sched, evsel, sample, machine, thread); return 0; } @@ -2228,8 +2300,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, if (thread == NULL) return; - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, migrated)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, migrated, evsel, sample)) { return; } @@ -2314,7 +2386,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - if (timehist_skip_sample(sched, thread)) + if (timehist_skip_sample(sched, thread, evsel, sample)) goto out; tr = thread__get_runtime(thread); @@ -2333,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, if (ptime->start && ptime->start > t) goto out; - if (ptime->start > tprev) + if (tprev && ptime->start > tprev) tprev = ptime->start; /* @@ -2350,7 +2422,39 @@ static int timehist_sched_change_event(struct perf_tool *tool, t = ptime->end; } - timehist_update_runtime_stats(tr, t, tprev); + if (!sched->idle_hist || thread->tid == 0) { + timehist_update_runtime_stats(tr, t, tprev); + + if (sched->idle_hist) { + struct idle_thread_runtime *itr = (void *)tr; + struct thread_runtime *last_tr; + + BUG_ON(thread->tid != 0); + + if (itr->last_thread == NULL) + goto out; + + /* add current idle time as last thread's runtime */ + last_tr = thread__get_runtime(itr->last_thread); + if (last_tr == NULL) + goto out; + + timehist_update_runtime_stats(last_tr, t, tprev); + /* + * remove delta time of last thread as it's not updated + * and otherwise it will show an invalid value next + * time. we only care total run time and run stat. + */ + last_tr->dt_run = 0; + last_tr->dt_wait = 0; + last_tr->dt_delay = 0; + + if (itr->cursor.nr) + callchain_append(&itr->callchain, &itr->cursor, t - tprev); + + itr->last_thread = NULL; + } + } if (!sched->summary_only) timehist_print_sample(sched, sample, &al, thread, t); @@ -2457,6 +2561,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv) return __show_thread_runtime(t, priv); } +static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = " <- "; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (node == NULL) + return 0; + + ret = callchain__fprintf_folded(fp, node->parent); + first = (ret == 0); + + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym && chain->ms.sym->ignore) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, bf, sizeof(bf), + false)); + first = false; + } + + return ret; +} + +static size_t timehist_print_idlehist_callchain(struct rb_root *root) +{ + size_t ret = 0; + FILE *fp = stdout; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(root); + + printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); + printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + while (rb_node) { + chain = rb_entry(rb_node, struct callchain_node, rb_node); + rb_node = rb_next(rb_node); + + ret += fprintf(fp, " "); + print_sched_time(chain->hit, 12); + ret += 16; /* print_sched_time returns 2nd arg + 4 */ + ret += fprintf(fp, " %8d ", chain->count); + ret += callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + } + + return ret; +} + static void timehist_print_summary(struct perf_sched *sched, struct perf_session *session) { @@ -2469,12 +2627,15 @@ static void timehist_print_summary(struct perf_sched *sched, memset(&totals, 0, sizeof(totals)); - if (comm_width < 30) - comm_width = 30; - - printf("\nRuntime summary\n"); - printf("%*s parent sched-in ", comm_width, "comm"); - printf(" run-time min-run avg-run max-run stddev migrations\n"); + if (sched->idle_hist) { + printf("\nIdle-time summary\n"); + printf("%*s parent sched-out ", comm_width, "comm"); + printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); + } else { + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev migrations\n"); + } printf("%*s (count) ", comm_width, ""); printf(" (msec) (msec) (msec) (msec) %%\n"); printf("%.117s\n", graph_dotted_line); @@ -2490,7 +2651,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf("<no terminated tasks>\n"); /* CPU idle stats not tracked when samples were skipped */ - if (sched->skipped_samples) + if (sched->skipped_samples && !sched->idle_hist) return; printf("\nIdle stats:\n"); @@ -2509,6 +2670,35 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } + if (sched->idle_hist && symbol_conf.use_callchain) { + callchain_param.mode = CHAIN_FOLDED; + callchain_param.value = CCVAL_PERIOD; + + callchain_register_param(&callchain_param); + + printf("\nIdle stats by callchain:\n"); + for (i = 0; i < idle_max_cpu; ++i) { + struct idle_thread_runtime *itr; + + t = idle_threads[i]; + if (!t) + continue; + + itr = thread__priv(t); + if (itr == NULL) + continue; + + callchain_param.sort(&itr->sorted_root, &itr->callchain, + 0, &callchain_param); + + printf(" CPU %2d:", i); + print_sched_time(itr->tr.total_run_time, 6); + printf(" msec\n"); + timehist_print_idlehist_callchain(&itr->sorted_root); + printf("\n"); + } + } + printf("\n" " Total number of unique tasks: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n" @@ -3036,6 +3226,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_PARENT(sched_options) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 688dea7cb08f..a02f2e965628 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, } static -int process_stat_config_event(struct perf_tool *tool __maybe_unused, +int process_stat_config_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st) } static -int process_thread_map_event(struct perf_tool *tool __maybe_unused, +int process_thread_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused, } static -int process_cpu_map_event(struct perf_tool *tool __maybe_unused, +int process_cpu_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh new file mode 100755 index 000000000000..c747bfd7f14d --- /dev/null +++ b/tools/perf/check-headers.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +HEADERS=' +include/uapi/linux/perf_event.h +include/linux/hash.h +include/uapi/linux/hw_breakpoint.h +arch/x86/include/asm/disabled-features.h +arch/x86/include/asm/required-features.h +arch/x86/include/asm/cpufeatures.h +arch/arm/include/uapi/asm/perf_regs.h +arch/arm64/include/uapi/asm/perf_regs.h +arch/powerpc/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/kvm.h +arch/x86/include/uapi/asm/kvm_perf.h +arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/vmx.h +arch/powerpc/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm_perf.h +arch/s390/include/uapi/asm/sie.h +arch/arm/include/uapi/asm/kvm.h +arch/arm64/include/uapi/asm/kvm.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h +include/asm-generic/bitops/fls64.h +include/linux/coresight-pmu.h +include/uapi/asm-generic/mman-common.h +' + +check () { + file=$1 + opts= + + shift + while [ -n "$*" ]; do + opts="$opts \"$1\"" + shift + done + + cmd="diff $opts ../$file ../../$file > /dev/null" + + test -f ../../$file && + eval $cmd || echo "Warning: $file differs from kernel" >&2 +} + + +# simple diff check +for i in $HEADERS; do + check $i -B +done + +# diff with extra ignore lines +check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" +check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" +check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" +check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 9a0236a4cf95..1c27d947c2fe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -55,6 +55,7 @@ struct record_opts { bool all_user; bool tail_synthesize; bool overwrite; + bool ignore_missing_thread; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 23605202d4a1..a77dcc0d24e3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -186,6 +186,10 @@ static struct test generic_tests[] = { .func = test__thread_map_synthesize, }, { + .desc = "Remove thread map", + .func = test__thread_map_remove, + }, + { .desc = "Synthesize cpu map", .func = test__cpu_map_synthesize, }, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 0d7b251305af..a512f0c8ff5b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest); int test__bpf_subtest_get_nr(void); int test_session_topology(int subtest); int test__thread_map_synthesize(int subtest); +int test__thread_map_remove(int subtest); int test__cpu_map_synthesize(int subtest); int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index cee2a2cdc933..a4a4b4625ac3 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -1,3 +1,4 @@ +#include <stdlib.h> #include <sys/types.h> #include <unistd.h> #include <sys/prctl.h> @@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused) return 0; } + +int test__thread_map_remove(int subtest __maybe_unused) +{ + struct thread_map *threads; + char *str; + int i; + + TEST_ASSERT_VAL("failed to allocate map string", + asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); + + threads = thread_map__new_str(str, NULL, 0); + + TEST_ASSERT_VAL("failed to allocate thread_map", + threads); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to not remove thread", + thread_map__remove(threads, 0)); + + for (i = 0; i < threads->nr; i++) + free(threads->map[i].comm); + + free(threads); + return 0; +} diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684..af1cfde6b97b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ec7a30fad149..ba36aac340bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > (u64)dl->offset; + bool fwd = dl->ops.target.offset > dl->offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy { if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || !disasm_line__has_offset(dl) - || dl->ops.target.offset >= symbol__size(sym)) + || dl->ops.target.offset < 0 + || dl->ops.target.offset >= (s64)symbol__size(sym)) return false; return true; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ea7e0de4b9c1..06cc04e5806a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -223,13 +223,19 @@ bool ins__is_call(const struct ins *ins) static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); + const char *c = strchr(ops->raw, ','); - ops->target.addr = strtoull(ops->raw, NULL, 16); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + else + ops->target.addr = strtoull(ops->raw, NULL, 16); - if (s++ != NULL) + if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); - else - ops->target.offset = UINT64_MAX; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } return 0; } @@ -237,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (!ops->target.addr) + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); @@ -641,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; @@ -1205,9 +1212,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (dl == NULL) return -1; - if (dl->ops.target.offset == UINT64_MAX) + if (!disasm_line__has_offset(dl)) { dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail = true; + } /* kcore has no symbols, so add the call target name */ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 87e4cadc5d27..09776b5af991 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,7 +24,8 @@ struct ins_operands { char *raw; char *name; u64 addr; - u64 offset; + s64 offset; + bool offset_avail; } target; union { struct { @@ -68,7 +69,7 @@ struct disasm_line { static inline bool disasm_line__has_offset(const struct disasm_line *dl) { - return dl->ops.target.offset != UINT64_MAX; + return dl->ops.target.offset_avail; } void disasm_line__free(struct disasm_line *dl); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b2365a63db45..04e536ae4d88 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * it overloads any global configuration. */ apply_config_terms(evsel, opts); + + evsel->ignore_missing_thread = opts->ignore_missing_thread; } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static bool ignore_missing_thread(struct perf_evsel *evsel, + struct thread_map *threads, + int thread, int err) +{ + if (!evsel->ignore_missing_thread) + return false; + + /* The system wide setup does not work with threads. */ + if (evsel->system_wide) + return false; + + /* The -ESRCH is perf event syscall errno for pid's not found. */ + if (err != -ESRCH) + return false; + + /* If there's only one thread, let it fail. */ + if (threads->nr == 1) + return false; + + if (thread_map__remove(threads, thread)) + return false; + + pr_warning("WARNING: Ignored open failure for pid %d\n", + thread_map__pid(threads, thread)); + return true; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1474,7 +1503,7 @@ retry_sample_id: for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { - int group_fd; + int fd, group_fd; if (!evsel->cgrp && !evsel->system_wide) pid = thread_map__pid(threads, thread); @@ -1484,21 +1513,37 @@ retry_open: pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); - FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - pid, - cpus->map[cpu], - group_fd, flags); - if (FD(evsel, cpu, thread) < 0) { + fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], + group_fd, flags); + + FD(evsel, cpu, thread) = fd; + + if (fd < 0) { err = -errno; + + if (ignore_missing_thread(evsel, threads, thread, err)) { + /* + * We just removed 1 thread, so take a step + * back on thread index and lower the upper + * nthreads limit. + */ + nthreads--; + thread--; + + /* ... and pretend like nothing have happened. */ + err = 0; + continue; + } + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } - pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + pr_debug2(" = %d\n", fd); if (evsel->bpf_fd >= 0) { - int evt_fd = FD(evsel, cpu, thread); + int evt_fd = fd; int bpf_fd = evsel->bpf_fd; err = ioctl(evt_fd, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6abb89cd27f9..06ef6f29efa1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; + bool ignore_missing_thread; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index df2482b2ba45..dc93940de351 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 40585f5b7027..f9eab200fd75 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid) return false; } + +int thread_map__remove(struct thread_map *threads, int idx) +{ + int i; + + if (threads->nr < 1) + return -EINVAL; + + if (idx >= threads->nr) + return -EINVAL; + + /* + * Free the 'idx' item and shift the rest up. + */ + free(threads->map[idx].comm); + + for (i = idx; i < threads->nr - 1; i++) + threads->map[i] = threads->map[i + 1]; + + threads->nr--; + return 0; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index bd3b971588da..ea0ef08c6303 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) void thread_map__read_comms(struct thread_map *threads); bool thread_map__has(struct thread_map *threads, pid_t pid); +int thread_map__remove(struct thread_map *threads, int idx); #endif /* __PERF_THREAD_MAP_H */ |