diff options
Diffstat (limited to 'tools')
666 files changed, 25530 insertions, 6864 deletions
diff --git a/tools/Makefile b/tools/Makefile index 7e9d34ddd74c..5da1fde03a9a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,6 +12,7 @@ help: @echo ' acpi - ACPI tools' @echo ' bpf - misc BPF tools' @echo ' cgroup - cgroup tools' + @echo ' counter - counter tools' @echo ' cpupower - a tool for all things x86 CPU power' @echo ' debugging - tools for debugging' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @@ -65,7 +66,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE +cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -100,7 +101,7 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) -all: acpi cgroup cpupower gpio hv firewire liblockdep \ +all: acpi cgroup counter cpupower gpio hv firewire liblockdep \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ @@ -112,7 +113,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: +cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -133,7 +134,7 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) -install: acpi_install cgroup_install cpupower_install gpio_install \ +install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ hv_install firewire_install iio_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ @@ -147,7 +148,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: +cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -181,7 +182,7 @@ freefall_clean: build_clean: $(call descend,build,clean) -clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ +clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index 578b3ee86105..749a2e3af89e 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,27 +61,35 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, - /* Max regs without the extended regs */ + PERF_REG_POWERPC_SDAR, + PERF_REG_POWERPC_SIAR, + /* Max mask value for interrupt regs w/o extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, + /* Max mask value for interrupt regs including extended regs */ + PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ -#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) - /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 - * includes 9 SPRS from MMCR0 to PMC6 excluding the - * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + * includes 11 SPRS from MMCR0 to SIAR excluding the + * unsupported SPRS MMCR3, SIER2 and SIER3. */ -#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) +#define PERF_REG_PMU_MASK_300 \ + ((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \ + (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ + (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ + (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ + (1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \ + (1ULL << PERF_REG_POWERPC_SIAR)) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 - * includes 12 SPRs from MMCR0 to PMC6. + * includes 14 SPRs from MMCR0 to SIAR. */ -#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) +#define PERF_REG_PMU_MASK_31 \ + (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ + (1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3)) -#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 797699462cd8..8fd63a067308 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -13,6 +13,7 @@ #endif #include "../include/asm/inat.h" /* __ignore_sync_check__ */ #include "../include/asm/insn.h" /* __ignore_sync_check__ */ +#include "../include/asm-generic/unaligned.h" /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> @@ -37,10 +38,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) + ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile index da5975775337..566c3e0ee561 100644 --- a/tools/bootconfig/Makefile +++ b/tools/bootconfig/Makefile @@ -15,9 +15,9 @@ CFLAGS = -Wall -g -I$(CURDIR)/include ALL_TARGETS := bootconfig ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) -all: $(ALL_PROGRAMS) +all: $(ALL_PROGRAMS) test -$(OUTPUT)bootconfig: main.c $(LIBSRC) +$(OUTPUT)bootconfig: main.c include/linux/bootconfig.h $(LIBSRC) $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ test: $(ALL_PROGRAMS) test-bootconfig.sh diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h index de7f30f99af3..6784296a0692 100644 --- a/tools/bootconfig/include/linux/bootconfig.h +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -2,10 +2,53 @@ #ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H #define _BOOTCONFIG_LINUX_BOOTCONFIG_H -#include "../../../../include/linux/bootconfig.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <ctype.h> +#include <errno.h> +#include <string.h> + #ifndef fallthrough # define fallthrough #endif +#define WARN_ON(cond) \ + ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ + __FILE__, __LINE__, __func__, #cond) : 0) + +#define unlikely(cond) (cond) + +/* Copied from lib/string.c */ +static inline char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +static inline char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +#define __init +#define __initdata + +#include "../../../../include/linux/bootconfig.h" + #endif diff --git a/tools/bootconfig/include/linux/bug.h b/tools/bootconfig/include/linux/bug.h deleted file mode 100644 index 7b65a389c0dd..000000000000 --- a/tools/bootconfig/include/linux/bug.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_BUG_H -#define _SKC_LINUX_BUG_H - -#include <stdio.h> -#include <stdlib.h> - -#define WARN_ON(cond) \ - ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ - __FILE__, __LINE__, __func__, #cond) : 0) - -#endif diff --git a/tools/bootconfig/include/linux/ctype.h b/tools/bootconfig/include/linux/ctype.h deleted file mode 100644 index c56ecc136448..000000000000 --- a/tools/bootconfig/include/linux/ctype.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_CTYPE_H -#define _SKC_LINUX_CTYPE_H - -#include <ctype.h> - -#endif diff --git a/tools/bootconfig/include/linux/errno.h b/tools/bootconfig/include/linux/errno.h deleted file mode 100644 index 5d9f91ec2fda..000000000000 --- a/tools/bootconfig/include/linux/errno.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_ERRNO_H -#define _SKC_LINUX_ERRNO_H - -#include <asm/errno.h> - -#endif diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h deleted file mode 100644 index 2d93320aa374..000000000000 --- a/tools/bootconfig/include/linux/kernel.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_KERNEL_H -#define _SKC_LINUX_KERNEL_H - -#include <stdlib.h> -#include <stdbool.h> - -#include <linux/printk.h> - -typedef unsigned short u16; -typedef unsigned int u32; - -#define unlikely(cond) (cond) - -#define __init -#define __initdata - -#endif diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h deleted file mode 100644 index f2e506f7d57f..000000000000 --- a/tools/bootconfig/include/linux/memblock.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _XBC_LINUX_MEMBLOCK_H -#define _XBC_LINUX_MEMBLOCK_H - -#include <stdlib.h> - -#define SMP_CACHE_BYTES 0 -#define memblock_alloc(size, align) malloc(size) -#define memblock_free_ptr(paddr, size) free(paddr) - -#endif diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h deleted file mode 100644 index 036e667596eb..000000000000 --- a/tools/bootconfig/include/linux/printk.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_PRINTK_H -#define _SKC_LINUX_PRINTK_H - -#include <stdio.h> - -#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__) - -#define pr_err printk -#define pr_warn printk -#define pr_info printk -#define pr_debug printk - -#endif diff --git a/tools/bootconfig/include/linux/string.h b/tools/bootconfig/include/linux/string.h deleted file mode 100644 index 8267af75153a..000000000000 --- a/tools/bootconfig/include/linux/string.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_STRING_H -#define _SKC_LINUX_STRING_H - -#include <string.h> - -/* Copied from lib/string.c */ -static inline char *skip_spaces(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} - -static inline char *strim(char *s) -{ - size_t size; - char *end; - - size = strlen(s); - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return skip_spaces(s); -} - -#endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index fd67496a947f..156b62a163c5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -12,9 +12,10 @@ #include <errno.h> #include <endian.h> -#include <linux/kernel.h> #include <linux/bootconfig.h> +#define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) + static int xbc_show_value(struct xbc_node *node, bool semicolon) { const char *val, *eol; @@ -176,7 +177,7 @@ static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; int ret; - u32 size = 0, csum = 0, rcsum; + uint32_t size = 0, csum = 0, rcsum; char magic[BOOTCONFIG_MAGIC_LEN]; const char *msg; @@ -200,11 +201,11 @@ static int load_xbc_from_initrd(int fd, char **buf) if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); - if (read(fd, &size, sizeof(u32)) < 0) + if (read(fd, &size, sizeof(uint32_t)) < 0) return pr_errno("Failed to read size", -errno); size = le32toh(size); - if (read(fd, &csum, sizeof(u32)) < 0) + if (read(fd, &csum, sizeof(uint32_t)) < 0) return pr_errno("Failed to read checksum", -errno); csum = le32toh(csum); @@ -229,7 +230,7 @@ static int load_xbc_from_initrd(int fd, char **buf) return -EINVAL; } - ret = xbc_init(*buf, &msg, NULL); + ret = xbc_init(*buf, size, &msg, NULL); /* Wrong data */ if (ret < 0) { pr_err("parse error: %s.\n", msg); @@ -269,7 +270,7 @@ static int init_xbc_with_error(char *buf, int len) if (!copy) return -ENOMEM; - ret = xbc_init(buf, &msg, &pos); + ret = xbc_init(buf, len, &msg, &pos); if (ret < 0) show_xbc_error(copy, msg, pos); free(copy); @@ -362,7 +363,7 @@ static int apply_xbc(const char *path, const char *xbc_path) size_t total_size; struct stat stat; const char *msg; - u32 size, csum; + uint32_t size, csum; int pos, pad; int ret, fd; @@ -376,13 +377,13 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Backup the bootconfig data */ data = calloc(size + BOOTCONFIG_ALIGN + - sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); + sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; memcpy(data, buf, size); /* Check the data format */ - ret = xbc_init(buf, &msg, &pos); + ret = xbc_init(buf, size, &msg, &pos); if (ret < 0) { show_xbc_error(data, msg, pos); free(data); @@ -391,12 +392,13 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } printf("Apply %s to %s\n", xbc_path, path); + xbc_get_info(&ret, NULL); printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); printf("\tChecksum: %d\n", (unsigned int)csum); /* TODO: Check the options by schema */ - xbc_destroy_all(); + xbc_exit(); free(buf); /* Remove old boot config if exists */ @@ -423,17 +425,17 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ - total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; + total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN; pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; size += pad; /* Add a footer */ p = data + size; - *(u32 *)p = htole32(size); - p += sizeof(u32); + *(uint32_t *)p = htole32(size); + p += sizeof(uint32_t); - *(u32 *)p = htole32(csum); - p += sizeof(u32); + *(uint32_t *)p = htole32(csum); + p += sizeof(uint32_t); memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); p += BOOTCONFIG_MAGIC_LEN; diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index d73232be1e99..c0c30e56988f 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -14,33 +14,43 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) - LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ - LIBBPF_PATH = $(LIBBPF_OUTPUT) - BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ + _OUTPUT := $(OUTPUT) else - LIBBPF_OUTPUT = - LIBBPF_PATH = $(BPF_DIR) - BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ + _OUTPUT := $(CURDIR) endif +BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ +LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +LIBBPF_HDRS_DIR := $(LIBBPF_INCLUDE)/bpf -LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF = $(LIBBPF_OUTPUT)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a +# We need to copy hashmap.h and nlattr.h which is not otherwise exported by +# libbpf, but still required by bpftool. +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) + ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif -$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ -$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a +$(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers + +$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_DIR) + $(call QUIET_INSTALL, $@) + $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< -$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) +$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ @@ -60,11 +70,10 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(if $(OUTPUT),$(OUTPUT),.) \ + -I$(LIBBPF_INCLUDE) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ - -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/lib \ - -I$(srctree)/tools/perf + -I$(srctree)/tools/include/uapi CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) @@ -137,7 +146,10 @@ endif BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) +$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) + OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +$(OBJS): $(LIBBPF) $(LIBBPF_INTERNAL_HDRS) VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -164,8 +176,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) \ - -I$(srctree)/tools/lib \ + -I$(LIBBPF_INCLUDE) \ -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) @@ -186,7 +197,10 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -$(OUTPUT)feature.o: | zdep +$(OUTPUT)feature.o: +ifneq ($(feature-zlib), 1) + $(error "No zlib found") +endif $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ @@ -195,7 +209,7 @@ $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) -$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) +$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)%.o: %.c @@ -214,10 +228,12 @@ clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ -install: $(OUTPUT)bpftool +install-bin: $(OUTPUT)bpftool $(call QUIET_INSTALL, bpftool) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + +install: install-bin $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) @@ -240,10 +256,7 @@ doc-uninstall: FORCE: -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - .SECONDARY: -.PHONY: all FORCE clean install uninstall zdep +.PHONY: all FORCE clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index f7e5ff3586c9..015d2758f826 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,14 +8,15 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> #include <linux/btf.h> -#include <linux/hashtable.h> #include <sys/types.h> #include <sys/stat.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/hashmap.h> +#include <bpf/libbpf.h> + #include "json_writer.h" #include "main.h" @@ -37,16 +38,12 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", -}; - -struct btf_attach_table { - DECLARE_HASHTABLE(table, 16); + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; struct btf_attach_point { __u32 obj_id; __u32 btf_id; - struct hlist_node hash; }; static const char *btf_int_enc_str(__u8 encoding) @@ -328,7 +325,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf("\n\ttype_id=%u offset=%u size=%u", v->type, v->offset, v->size); - if (v->type <= btf__get_nr_types(btf)) { + if (v->type < btf__type_cnt(btf)) { vt = btf__type_by_id(btf, v->type); printf(" (%s '%s')", btf_kind_str[btf_kind_safe(btf_kind(vt))], @@ -347,6 +344,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" size=%u", t->size); break; } + case BTF_KIND_DECL_TAG: { + const struct btf_decl_tag *tag = (const void *)(t + 1); + + if (json_output) { + jsonw_uint_field(w, "type_id", t->type); + jsonw_int_field(w, "component_idx", tag->component_idx); + } else { + printf(" type_id=%u component_idx=%d", t->type, tag->component_idx); + } + break; + } default: break; } @@ -378,14 +386,14 @@ static int dump_btf_raw(const struct btf *btf, } } else { const struct btf *base; - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); int start_id = 1; base = btf__base_btf(btf); if (base) - start_id = btf__get_nr_types(base) + 1; + start_id = btf__type_cnt(base); - for (i = start_id; i <= cnt; i++) { + for (i = start_id; i < cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -428,9 +436,9 @@ static int dump_btf_c(const struct btf *btf, goto done; } } else { - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); - for (i = 1; i <= cnt; i++) { + for (i = 1; i < cnt; i++) { err = btf_dump__dump_type(d, i); if (err) goto done; @@ -633,21 +641,8 @@ static int btf_parse_fd(int *argc, char ***argv) return fd; } -static void delete_btf_table(struct btf_attach_table *tab) -{ - struct btf_attach_point *obj; - struct hlist_node *tmp; - - unsigned int bkt; - - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj); - } -} - static int -build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, +build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, void *info, __u32 *len) { static const char * const names[] = { @@ -655,7 +650,6 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; - struct btf_attach_point *obj_node; __u32 btf_id, id = 0; int err; int fd; @@ -729,28 +723,25 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, if (!btf_id) continue; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { - p_err("failed to allocate memory: %s", strerror(errno)); - err = -ENOMEM; + err = hashmap__append(tab, u32_as_hash_field(btf_id), + u32_as_hash_field(id)); + if (err) { + p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", + btf_id, id, strerror(errno)); goto err_free; } - - obj_node->obj_id = id; - obj_node->btf_id = btf_id; - hash_add(tab->table, &obj_node->hash, obj_node->btf_id); } return 0; err_free: - delete_btf_table(tab); + hashmap__free(tab); return err; } static int -build_btf_tables(struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +build_btf_tables(struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_prog_info prog_info; __u32 prog_len = sizeof(prog_info); @@ -766,7 +757,7 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info, &map_len); if (err) { - delete_btf_table(btf_prog_table); + hashmap__free(btf_prog_table); return err; } @@ -775,10 +766,10 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, static void show_btf_plain(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); int n; @@ -792,29 +783,30 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("size %uB", info->btf_size); n = 0; - hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " prog_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " prog_ids " : ",", + hash_field_as_u32(entry->value)); } n = 0; - hash_for_each_possible(btf_map_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " map_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " map_ids " : ",", + hash_field_as_u32(entry->value)); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); } static void show_btf_json(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ @@ -823,23 +815,21 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_name(json_wtr, "prog_ids"); jsonw_start_array(json_wtr); /* prog_ids */ - hash_for_each_possible(btf_prog_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* prog_ids */ jsonw_name(json_wtr, "map_ids"); jsonw_start_array(json_wtr); /* map_ids */ - hash_for_each_possible(btf_map_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* map_ids */ - emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + emit_obj_refs_json(refs_table, info->id, json_wtr); /* pids */ jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); @@ -850,8 +840,8 @@ show_btf_json(struct bpf_btf_info *info, int fd, } static int -show_btf(int fd, struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +show_btf(int fd, struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_btf_info info; __u32 len = sizeof(info); @@ -888,8 +878,8 @@ show_btf(int fd, struct btf_attach_table *btf_prog_table, static int do_show(int argc, char **argv) { - struct btf_attach_table btf_prog_table; - struct btf_attach_table btf_map_table; + struct hashmap *btf_prog_table; + struct hashmap *btf_map_table; int err, fd = -1; __u32 id = 0; @@ -905,9 +895,19 @@ static int do_show(int argc, char **argv) return BAD_ARG(); } - hash_init(btf_prog_table.table); - hash_init(btf_map_table.table); - err = build_btf_tables(&btf_prog_table, &btf_map_table); + btf_prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + btf_map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!btf_prog_table || !btf_map_table) { + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); + if (fd >= 0) + close(fd); + p_err("failed to create hashmap for object references"); + return -1; + } + err = build_btf_tables(btf_prog_table, btf_map_table); if (err) { if (fd >= 0) close(fd); @@ -916,7 +916,7 @@ static int do_show(int argc, char **argv) build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); goto exit_free; } @@ -948,7 +948,7 @@ static int do_show(int argc, char **argv) break; } - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); if (err) break; @@ -958,9 +958,9 @@ static int do_show(int argc, char **argv) jsonw_end_array(json_wtr); /* root array */ exit_free: - delete_btf_table(&btf_prog_table); - delete_btf_table(&btf_map_table); - delete_obj_refs_table(&refs_table); + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); + delete_obj_refs_table(refs_table); return err; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index d42d930a3ec4..511eccdbdfe6 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -22,6 +22,7 @@ #include <sys/vfs.h> #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ #include "main.h" @@ -393,7 +394,7 @@ void print_hex_data_json(uint8_t *data, size_t len) } /* extra params for nftw cb */ -static struct pinned_obj_table *build_fn_table; +static struct hashmap *build_fn_table; static enum bpf_obj_type build_fn_type; static int do_build_table_cb(const char *fpath, const struct stat *sb, @@ -401,9 +402,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, { struct bpf_prog_info pinned_info; __u32 len = sizeof(pinned_info); - struct pinned_obj *obj_node; enum bpf_obj_type objtype; int fd, err = 0; + char *path; if (typeflag != FTW_F) goto out_ret; @@ -420,28 +421,26 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { + path = strdup(fpath); + if (!path) { err = -1; goto out_close; } - obj_node->id = pinned_info.id; - obj_node->path = strdup(fpath); - if (!obj_node->path) { - err = -1; - free(obj_node); + err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path); + if (err) { + p_err("failed to append entry to hashmap for ID %u, path '%s': %s", + pinned_info.id, path, strerror(errno)); goto out_close; } - hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); out_close: close(fd); out_ret: return err; } -int build_pinned_obj_table(struct pinned_obj_table *tab, +int build_pinned_obj_table(struct hashmap *tab, enum bpf_obj_type type) { struct mntent *mntent = NULL; @@ -470,17 +469,18 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, return err; } -void delete_pinned_obj_table(struct pinned_obj_table *tab) +void delete_pinned_obj_table(struct hashmap *map) { - struct pinned_obj *obj; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj->path); - free(obj); - } + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) + free(entry->value); + + hashmap__free(map); } unsigned int get_page_size(void) @@ -962,3 +962,13 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } + +size_t hash_fn_for_key_as_id(const void *key, void *ctx) +{ + return (size_t)key; +} + +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 7f36385aa9e2..ade44577688e 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -624,6 +624,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, */ switch (id) { case BPF_FUNC_trace_printk: + case BPF_FUNC_trace_vprintk: case BPF_FUNC_probe_write_user: if (!full_mode) continue; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index d40d92bbf0e4..5c18351290f0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -18,7 +18,6 @@ #include <sys/stat.h> #include <sys/mman.h> #include <bpf/btf.h> -#include <bpf/bpf_gen_internal.h> #include "json_writer.h" #include "main.h" @@ -34,6 +33,11 @@ static void sanitize_identifier(char *name) name[i] = '_'; } +static bool str_has_prefix(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + static bool str_has_suffix(const char *str, const char *suffix) { size_t i, n1 = strlen(str), n2 = strlen(suffix); @@ -68,23 +72,47 @@ static void get_header_guard(char *guard, const char *obj_name) guard[i] = toupper(guard[i]); } -static const char *get_map_ident(const struct bpf_map *map) +static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz) { + static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; const char *name = bpf_map__name(map); + int i, n; + + if (!bpf_map__is_internal(map)) { + snprintf(buf, buf_sz, "%s", name); + return true; + } + + for (i = 0, n = ARRAY_SIZE(sfxs); i < n; i++) { + const char *sfx = sfxs[i], *p; + + p = strstr(name, sfx); + if (p) { + snprintf(buf, buf_sz, "%s", p + 1); + sanitize_identifier(buf); + return true; + } + } + + return false; +} + +static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) +{ + static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; + int i, n; + + for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { + const char *pfx = pfxs[i]; + + if (str_has_prefix(sec_name, pfx)) { + snprintf(buf, buf_sz, "%s", sec_name + 1); + sanitize_identifier(buf); + return true; + } + } - if (!bpf_map__is_internal(map)) - return name; - - if (str_has_suffix(name, ".data")) - return "data"; - else if (str_has_suffix(name, ".rodata")) - return "rodata"; - else if (str_has_suffix(name, ".bss")) - return "bss"; - else if (str_has_suffix(name, ".kconfig")) - return "kconfig"; - else - return NULL; + return false; } static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args) @@ -101,24 +129,14 @@ static int codegen_datasec_def(struct bpf_object *obj, const char *sec_name = btf__name_by_offset(btf, sec->name_off); const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); - const char *sec_ident; - char var_ident[256]; + char var_ident[256], sec_ident[256]; bool strip_mods = false; - if (strcmp(sec_name, ".data") == 0) { - sec_ident = "data"; - strip_mods = true; - } else if (strcmp(sec_name, ".bss") == 0) { - sec_ident = "bss"; - strip_mods = true; - } else if (strcmp(sec_name, ".rodata") == 0) { - sec_ident = "rodata"; - strip_mods = true; - } else if (strcmp(sec_name, ".kconfig") == 0) { - sec_ident = "kconfig"; - } else { + if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident))) return 0; - } + + if (strcmp(sec_name, ".kconfig") != 0) + strip_mods = true; printf(" struct %s__%s {\n", obj_name, sec_ident); for (i = 0; i < vlen; i++, sec_var++) { @@ -193,24 +211,63 @@ static int codegen_datasec_def(struct bpf_object *obj, static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) { struct btf *btf = bpf_object__btf(obj); - int n = btf__get_nr_types(btf); + int n = btf__type_cnt(btf); struct btf_dump *d; + struct bpf_map *map; + const struct btf_type *sec; + char sec_ident[256], map_ident[256]; int i, err = 0; d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); if (IS_ERR(d)) return PTR_ERR(d); - for (i = 1; i <= n; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); + bpf_object__for_each_map(map, obj) { + /* only generate definitions for memory-mapped internal maps */ + if (!bpf_map__is_internal(map)) + continue; + if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + continue; - if (!btf_is_datasec(t)) + if (!get_map_ident(map, map_ident, sizeof(map_ident))) continue; - err = codegen_datasec_def(obj, btf, d, t, obj_name); - if (err) - goto out; + sec = NULL; + for (i = 1; i < n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const char *name; + + if (!btf_is_datasec(t)) + continue; + + name = btf__str_by_offset(btf, t->name_off); + if (!get_datasec_ident(name, sec_ident, sizeof(sec_ident))) + continue; + + if (strcmp(sec_ident, map_ident) == 0) { + sec = t; + break; + } + } + + /* In some cases (e.g., sections like .rodata.cst16 containing + * compiler allocated string constants only) there will be + * special internal maps with no corresponding DATASEC BTF + * type. In such case, generate empty structs for each such + * map. It will still be memory-mapped and its contents + * accessible from user-space through BPF skeleton. + */ + if (!sec) { + printf(" struct %s__%s {\n", obj_name, map_ident); + printf(" } *%s;\n", map_ident); + } else { + err = codegen_datasec_def(obj, btf, d, sec, obj_name); + if (err) + goto out; + } } + + out: btf_dump__free(d); return err; @@ -238,8 +295,8 @@ static void codegen(const char *template, ...) } else if (c == '\n') { break; } else { - p_err("unrecognized character at pos %td in template '%s'", - src - template - 1, template); + p_err("unrecognized character at pos %td in template '%s': '%c'", + src - template - 1, template, c); free(s); exit(-1); } @@ -386,6 +443,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) { struct bpf_program *prog; struct bpf_map *map; + char ident[256]; codegen("\ \n\ @@ -406,10 +464,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) } bpf_object__for_each_map(map, obj) { - const char * ident; - - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (bpf_map__is_internal(map) && (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) @@ -433,6 +488,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; + char ident[256]; int err = 0; err = bpf_object__gen_loader(obj, &opts); @@ -478,12 +534,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h ", obj_name, opts.data_sz); bpf_object__for_each_map(map, obj) { - const char *ident; const void *mmap_data = NULL; size_t mmap_size = 0; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || @@ -545,15 +599,15 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h return err; \n\ ", obj_name); bpf_object__for_each_map(map, obj) { - const char *ident, *mmap_flags; + const char *mmap_flags; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) continue; + if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else @@ -603,7 +657,8 @@ static int do_skeleton(int argc, char **argv) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; - const char *file, *ident; + const char *file; + char ident[256]; struct bpf_program *prog; int fd, err = -1; struct bpf_map *map; @@ -674,8 +729,7 @@ static int do_skeleton(int argc, char **argv) } bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) { + if (!get_map_ident(map, ident, sizeof(ident))) { p_err("ignoring unrecognized internal map '%s'...", bpf_map__name(map)); continue; @@ -728,8 +782,7 @@ static int do_skeleton(int argc, char **argv) if (map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (use_loader) printf("\t\tstruct bpf_map_desc %s;\n", ident); @@ -803,7 +856,10 @@ static int do_skeleton(int argc, char **argv) } \n\ \n\ err = %1$s__create_skeleton(obj); \n\ - err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\ + if (err) \n\ + goto err_out; \n\ + \n\ + err = bpf_object__open_skeleton(obj->skeleton, opts);\n\ if (err) \n\ goto err_out; \n\ \n\ @@ -862,6 +918,8 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ + static inline const void *%1$s__elf_bytes(size_t *sz); \n\ + \n\ static inline int \n\ %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ @@ -893,9 +951,7 @@ static int do_skeleton(int argc, char **argv) ); i = 0; bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; codegen("\ @@ -943,10 +999,20 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - s->data_sz = %d; \n\ - s->data = (void *)\"\\ \n\ - ", - file_sz); + s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ + \n\ + return 0; \n\ + err: \n\ + bpf_object__destroy_skeleton(s); \n\ + return -ENOMEM; \n\ + } \n\ + \n\ + static inline const void *%2$s__elf_bytes(size_t *sz) \n\ + { \n\ + *sz = %1$d; \n\ + return (const void *)\"\\ \n\ + " + , file_sz, obj_name); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -954,11 +1020,6 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ - \n\ - return 0; \n\ - err: \n\ - bpf_object__destroy_skeleton(s); \n\ - return -ENOMEM; \n\ } \n\ \n\ #endif /* %s */ \n\ diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 84a9b01d956d..6c0de647b8ad 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -57,7 +57,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("can't find bpf program in objfile %s", objfile); goto close_obj; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8cc3e36f8cc6..2c258db0d352 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -7,6 +7,7 @@ #include <unistd.h> #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include "json_writer.h" #include "main.h" @@ -20,6 +21,8 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETNS] = "netns", }; +static struct hashmap *link_table; + static int link_parse_fd(int *argc, char ***argv) { int fd; @@ -156,19 +159,18 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -244,15 +246,14 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -302,8 +303,15 @@ static int do_show(int argc, char **argv) __u32 id = 0; int err, fd; - if (show_pinned) - build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + if (show_pinned) { + link_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!link_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(link_table, BPF_OBJ_LINK); + } build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { @@ -344,7 +352,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(link_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 02eaaf065f65..28237d7cef67 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -10,8 +10,9 @@ #include <string.h> #include <bpf/bpf.h> -#include <bpf/libbpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> +#include <bpf/libbpf.h> #include "main.h" @@ -31,10 +32,7 @@ bool verifier_logs; bool relaxed_maps; bool use_loader; struct btf *base_btf; -struct pinned_obj_table prog_table; -struct pinned_obj_table map_table; -struct pinned_obj_table link_table; -struct obj_refs_table refs_table; +struct hashmap *refs_table; static void __noreturn clean_and_exit(int i) { @@ -409,10 +407,6 @@ int main(int argc, char **argv) block_mount = false; bin_name = argv[0]; - hash_init(prog_table.table); - hash_init(map_table.table); - hash_init(link_table.table); - opterr = 0; while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", options, NULL)) >= 0) { @@ -479,11 +473,6 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); - if (show_pinned) { - delete_pinned_obj_table(&prog_table); - delete_pinned_obj_table(&map_table); - delete_pinned_obj_table(&link_table); - } btf__free(base_btf); return ret; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 90caa42aac4c..383835c2604d 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -11,9 +11,9 @@ #include <linux/bpf.h> #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/hashtable.h> #include <tools/libc_compat.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> #include "json_writer.h" @@ -91,10 +91,7 @@ extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; extern struct btf *base_btf; -extern struct pinned_obj_table prog_table; -extern struct pinned_obj_table map_table; -extern struct pinned_obj_table link_table; -extern struct obj_refs_table refs_table; +extern struct hashmap *refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -108,28 +105,12 @@ void set_max_rlimit(void); int mount_tracefs(const char *target); -struct pinned_obj_table { - DECLARE_HASHTABLE(table, 16); -}; - -struct pinned_obj { - __u32 id; - char *path; - struct hlist_node hash; -}; - -struct obj_refs_table { - DECLARE_HASHTABLE(table, 16); -}; - struct obj_ref { int pid; char comm[16]; }; struct obj_refs { - struct hlist_node node; - __u32 id; int ref_cnt; struct obj_ref *refs; }; @@ -137,15 +118,15 @@ struct obj_refs { struct btf; struct bpf_line_info; -int build_pinned_obj_table(struct pinned_obj_table *table, +int build_pinned_obj_table(struct hashmap *table, enum bpf_obj_type type); -void delete_pinned_obj_table(struct pinned_obj_table *tab); -__weak int build_obj_refs_table(struct obj_refs_table *table, +void delete_pinned_obj_table(struct hashmap *table); +__weak int build_obj_refs_table(struct hashmap **table, enum bpf_obj_type type); -__weak void delete_obj_refs_table(struct obj_refs_table *table); -__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +__weak void delete_obj_refs_table(struct hashmap *table); +__weak void emit_obj_refs_json(struct hashmap *table, __u32 id, json_writer_t *json_wtr); -__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, +__weak void emit_obj_refs_plain(struct hashmap *table, __u32 id, const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); @@ -259,4 +240,23 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, int print_all_levels(__maybe_unused enum libbpf_print_level level, const char *format, va_list args); + +size_t hash_fn_for_key_as_id(const void *key, void *ctx); +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); + +static inline void *u32_as_hash_field(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +static inline __u32 hash_field_as_u32(const void *x) +{ + return (__u32)(uintptr_t)x; +} + +static inline bool hashmap__empty(struct hashmap *map) +{ + return map ? hashmap__size(map) == 0 : true; +} + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 407071d54ab1..cae1f1119296 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -17,6 +17,7 @@ #include <bpf/bpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> #include "json_writer.h" #include "main.h" @@ -56,6 +57,8 @@ const char * const map_type_name[] = { const size_t map_type_name_size = ARRAY_SIZE(map_type_name); +static struct hashmap *map_table; + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -535,19 +538,18 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -610,13 +612,12 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) } close(fd); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } printf("\n"); @@ -636,7 +637,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); return 0; @@ -694,8 +695,15 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) - build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + if (show_pinned) { + map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!map_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(map_table, BPF_OBJ_MAP); + } build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) @@ -740,7 +748,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(map_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 825f29f93a57..b98ea702d284 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -22,7 +22,6 @@ #include <sys/syscall.h> #include <bpf/bpf.h> -#include <perf-sys.h> #include "main.h" diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 477e55d59c34..56b598eee043 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -6,35 +6,37 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> + #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include "main.h" #include "skeleton/pid_iter.h" #ifdef BPFTOOL_WITHOUT_SKELETONS -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { return -ENOTSUP; } -void delete_obj_refs_table(struct obj_refs_table *table) {} -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {} +void delete_obj_refs_table(struct hashmap *map) {} +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) {} +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) {} #else /* BPFTOOL_WITHOUT_SKELETONS */ #include "pid_iter.skel.h" -static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +static void add_ref(struct hashmap *map, struct pid_iter_entry *e) { + struct hashmap_entry *entry; struct obj_refs *refs; struct obj_ref *ref; + int err, i; void *tmp; - int i; - hash_for_each_possible(table->table, refs, node, e->id) { - if (refs->id != e->id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) { + refs = entry->value; for (i = 0; i < refs->ref_cnt; i++) { if (refs->refs[i].pid == e->pid) @@ -64,7 +66,6 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) return; } - refs->id = e->id; refs->refs = malloc(sizeof(*refs->refs)); if (!refs->refs) { free(refs); @@ -76,7 +77,11 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); refs->ref_cnt = 1; - hash_add(table->table, &refs->node, e->id); + + err = hashmap__append(map, u32_as_hash_field(e->id), refs); + if (err) + p_err("failed to append entry to hashmap for ID %u: %s", + e->id, strerror(errno)); } static int __printf(2, 0) @@ -87,7 +92,7 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, return 0; } -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { struct pid_iter_entry *e; char buf[4096 / sizeof(*e) * sizeof(*e)]; @@ -95,7 +100,11 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) int err, ret, fd = -1, i; libbpf_print_fn_t default_print; - hash_init(table->table); + *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); + if (!*map) { + p_err("failed to create hashmap for PID references"); + return -1; + } set_max_rlimit(); skel = pid_iter_bpf__open(); @@ -151,7 +160,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) e = (void *)buf; for (i = 0; i < ret; i++, e++) { - add_ref(table, e); + add_ref(*map, e); } } err = 0; @@ -162,39 +171,44 @@ out: return err; } -void delete_obj_refs_table(struct obj_refs_table *table) +void delete_obj_refs_table(struct hashmap *map) { - struct obj_refs *refs; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; + + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) { + struct obj_refs *refs = entry->value; - hash_for_each_safe(table->table, bkt, tmp, refs, node) { - hash_del(&refs->node); free(refs->refs); free(refs); } + + hashmap__free(map); } -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; jsonw_name(json_writer, "pids"); jsonw_start_array(json_writer); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + jsonw_start_object(json_writer); jsonw_int_field(json_writer, "pid", ref->pid); jsonw_string_field(json_writer, "comm", ref->comm); @@ -205,24 +219,24 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, } } -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; printf("%s", prefix); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); } break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9c3e343b7d87..515d22952602 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -24,8 +24,8 @@ #include <bpf/bpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> -#include <bpf/bpf_gen_internal.h> #include <bpf/skel_internal.h> #include "cfg.h" @@ -85,6 +85,8 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; +static struct hashmap *prog_table; + static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -308,18 +310,12 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (printed_header) jsonw_end_object(json_wtr); } else { - json_writer_t *btf_wtr = jsonw_new(stdout); + json_writer_t *btf_wtr; struct btf_dumper d = { .btf = btf, - .jw = btf_wtr, .is_plain_text = true, }; - if (!btf_wtr) { - p_err("jsonw alloc failed"); - goto out_free; - } - for (i = 0; i < vlen; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); name = btf__name_by_offset(btf, t_var->name_off); @@ -329,6 +325,14 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!printed_header) { printf("\tmetadata:"); + + btf_wtr = jsonw_new(stdout); + if (!btf_wtr) { + p_err("jsonw alloc failed"); + goto out_free; + } + d.jw = btf_wtr, + printed_header = true; } @@ -415,19 +419,18 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); show_prog_metadata(fd, info->nr_map_ids); @@ -487,19 +490,18 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -566,8 +568,15 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) - build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + if (show_pinned) { + prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!prog_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(prog_table, BPF_OBJ_PROG); + } build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) @@ -610,7 +619,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(prog_table); return err; } @@ -1601,7 +1613,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; if (first_prog_only) { - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("object file doesn't contain any bpf program"); goto err_close_obj; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index bb9fa8de7e62..751643f860b2 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -26,27 +26,33 @@ LIBBPF_SRC := $(srctree)/tools/lib/bpf/ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/ BPFOBJ := $(OUTPUT)/libbpf/libbpf.a +LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/ SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a +LIBBPF_DESTDIR := $(LIBBPF_OUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include + BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: +$(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT): $(call msg,MKDIR,,$@) $(Q)mkdir -p $(@) $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(abspath $@) install_headers CFLAGS := -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(LIBBPF_SRC) \ + -I$(LIBBPF_INCLUDE) \ -I$(SUBCMD_SRC) LIBS = -lelf -lz @@ -54,7 +60,7 @@ LIBS = -lelf -lz export srctree OUTPUT CFLAGS Q include $(srctree)/tools/build/Makefile.include -$(BINARY_IN): fixdep FORCE | $(OUTPUT) +$(BINARY_IN): $(BPFOBJ) fixdep FORCE | $(OUTPUT) $(Q)$(MAKE) $(build)=resolve_btfids $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) @@ -64,7 +70,8 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ $(OUTPUT)/.*.o.d \ - $(OUTPUT)/libbpf \ + $(LIBBPF_OUT) \ + $(LIBBPF_DESTDIR) \ $(OUTPUT)/libsubcmd \ $(OUTPUT)/resolve_btfids) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index de6365b53c9c..a59cb0ee609c 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -60,8 +60,8 @@ #include <linux/rbtree.h> #include <linux/zalloc.h> #include <linux/err.h> -#include <btf.h> -#include <libbpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include <parse-options.h> #define BTF_IDS_SECTION ".BTF_ids" @@ -89,6 +89,7 @@ struct btf_id { struct object { const char *path; const char *btf; + const char *base_btf_path; struct { int fd; @@ -477,25 +478,36 @@ static int symbols_resolve(struct object *obj) int nr_structs = obj->nr_structs; int nr_unions = obj->nr_unions; int nr_funcs = obj->nr_funcs; + struct btf *base_btf = NULL; int err, type_id; struct btf *btf; __u32 nr_types; - btf = btf__parse(obj->btf ?: obj->path, NULL); + if (obj->base_btf_path) { + base_btf = btf__parse(obj->base_btf_path, NULL); + err = libbpf_get_error(base_btf); + if (err) { + pr_err("FAILED: load base BTF from %s: %s\n", + obj->base_btf_path, strerror(-err)); + return -1; + } + } + + btf = btf__parse_split(obj->btf ?: obj->path, base_btf); err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", obj->btf ?: obj->path, strerror(-err)); - return -1; + goto out; } err = -1; - nr_types = btf__get_nr_types(btf); + nr_types = btf__type_cnt(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr_types; type_id++) { + for (type_id = 1; type_id < nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -545,6 +557,7 @@ static int symbols_resolve(struct object *obj) err = 0; out: + btf__free(base_btf); btf__free(btf); return err; } @@ -678,7 +691,6 @@ static const char * const resolve_btfids_usage[] = { int main(int argc, const char **argv) { - bool no_fail = false; struct object obj = { .efile = { .idlist_shndx = -1, @@ -695,8 +707,8 @@ int main(int argc, const char **argv) "be more verbose (show errors, etc)"), OPT_STRING(0, "btf", &obj.btf, "BTF data", "BTF data"), - OPT_BOOLEAN(0, "no-fail", &no_fail, - "do not fail if " BTF_IDS_SECTION " section is not found"), + OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", + "path of file providing base BTF"), OPT_END() }; int err = -1; @@ -717,10 +729,8 @@ int main(int argc, const char **argv) */ if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { - if (no_fail) - return 0; - pr_err("FAILED to find needed sections\n"); - return -1; + pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); + return 0; } if (symbols_collect(&obj)) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 3818ec511fd2..bbd1150578f7 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -9,9 +9,9 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a -BPF_INCLUDE := $(BPFOBJ_OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ - -I$(abspath ../../include/uapi) +BPF_DESTDIR := $(BPFOBJ_OUTPUT) +BPF_INCLUDE := $(BPF_DESTDIR)/include +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) CFLAGS := -g -Wall # Try to detect best kernel BTF source @@ -33,7 +33,7 @@ endif .DELETE_ON_ERROR: -.PHONY: all clean runqslower +.PHONY: all clean runqslower libbpf_hdrs all: runqslower runqslower: $(OUTPUT)/runqslower @@ -46,13 +46,15 @@ clean: $(Q)$(RM) $(OUTPUT)runqslower $(Q)$(RM) -r .output +libbpf_hdrs: $(BPFOBJ) + $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ - $(OUTPUT)/runqslower.bpf.o + $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs -$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ @@ -81,8 +83,10 @@ else endif $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ + DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - CC=$(HOSTCC) LD=$(HOSTLD) + LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ + LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 3dd2f68366f9..45a9a59828c3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,6 +52,7 @@ FEATURE_TESTS_BASIC := \ libslang \ libslang-include-subdir \ libtraceevent \ + libtracefs \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index eff55d287db1..0a3244ad9673 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -36,6 +36,7 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ + test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ @@ -90,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) @@ -199,6 +200,9 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent +$(OUTPUT)test-libtracefs.bin: + $(BUILD) -ltracefs + $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto @@ -296,7 +300,7 @@ $(OUTPUT)test-jvmti-cmlr.bin: $(BUILD) $(OUTPUT)test-llvm.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ $(shell $(LLVM_CONFIG) --libs Core BPF) \ @@ -304,12 +308,12 @@ $(OUTPUT)test-llvm.bin: > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-llvm-version.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-clang.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ -Wl,--start-group -lclangBasic -lclangDriver \ diff --git a/tools/build/feature/test-libtracefs.c b/tools/build/feature/test-libtracefs.c new file mode 100644 index 000000000000..8eff16c0c10b --- /dev/null +++ b/tools/build/feature/test-libtracefs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs/tracefs.h> + +int main(void) +{ + struct tracefs_instance *inst = tracefs_instance_create("dummy"); + + tracefs_instance_destroy(inst); + return 0; +} diff --git a/tools/counter/Build b/tools/counter/Build new file mode 100644 index 000000000000..33f4a51d715e --- /dev/null +++ b/tools/counter/Build @@ -0,0 +1 @@ +counter_example-y += counter_example.o diff --git a/tools/counter/Makefile b/tools/counter/Makefile new file mode 100644 index 000000000000..5ebc195fd9c0 --- /dev/null +++ b/tools/counter/Makefile @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +bindir ?= /usr/bin + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include + +ALL_TARGETS := counter_example +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) + +all: $(ALL_PROGRAMS) + +export srctree OUTPUT CC LD CFLAGS +include $(srctree)/tools/build/Makefile.include + +# +# We need the following to be outside of kernel tree +# +$(OUTPUT)include/linux/counter.h: ../../include/uapi/linux/counter.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../include/uapi/linux/counter.h $@ + +prepare: $(OUTPUT)include/linux/counter.h + +COUNTER_EXAMPLE := $(OUTPUT)counter_example.o +$(COUNTER_EXAMPLE): prepare FORCE + $(Q)$(MAKE) $(build)=counter_example +$(OUTPUT)counter_example: $(COUNTER_EXAMPLE) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +clean: + rm -f $(ALL_PROGRAMS) + rm -rf $(OUTPUT)include/linux/counter.h + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + +install: $(ALL_PROGRAMS) + install -d -m 755 $(DESTDIR)$(bindir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program $(DESTDIR)$(bindir); \ + done + +FORCE: + +.PHONY: all install clean FORCE prepare diff --git a/tools/counter/counter_example.c b/tools/counter/counter_example.c new file mode 100644 index 000000000000..be55287b950f --- /dev/null +++ b/tools/counter/counter_example.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Counter - example userspace application + * + * The userspace application opens /dev/counter0, configures the + * COUNTER_EVENT_INDEX event channel 0 to gather Count 0 count and Count + * 1 count, and prints out the data as it becomes available on the + * character device node. + * + * Copyright (C) 2021 William Breathitt Gray + */ +#include <errno.h> +#include <fcntl.h> +#include <linux/counter.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +static struct counter_watch watches[2] = { + { + /* Component data: Count 0 count */ + .component.type = COUNTER_COMPONENT_COUNT, + .component.scope = COUNTER_SCOPE_COUNT, + .component.parent = 0, + /* Event type: Index */ + .event = COUNTER_EVENT_INDEX, + /* Device event channel 0 */ + .channel = 0, + }, + { + /* Component data: Count 1 count */ + .component.type = COUNTER_COMPONENT_COUNT, + .component.scope = COUNTER_SCOPE_COUNT, + .component.parent = 1, + /* Event type: Index */ + .event = COUNTER_EVENT_INDEX, + /* Device event channel 0 */ + .channel = 0, + }, +}; + +int main(void) +{ + int fd; + int ret; + int i; + struct counter_event event_data[2]; + + fd = open("/dev/counter0", O_RDWR); + if (fd == -1) { + perror("Unable to open /dev/counter0"); + return 1; + } + + for (i = 0; i < 2; i++) { + ret = ioctl(fd, COUNTER_ADD_WATCH_IOCTL, watches + i); + if (ret == -1) { + fprintf(stderr, "Error adding watches[%d]: %s\n", i, + strerror(errno)); + return 1; + } + } + ret = ioctl(fd, COUNTER_ENABLE_EVENTS_IOCTL); + if (ret == -1) { + perror("Error enabling events"); + return 1; + } + + for (;;) { + ret = read(fd, event_data, sizeof(event_data)); + if (ret == -1) { + perror("Failed to read event data"); + return 1; + } + + if (ret != sizeof(event_data)) { + fprintf(stderr, "Failed to read event data\n"); + return -EIO; + } + + printf("Timestamp 0: %llu\tCount 0: %llu\n" + "Error Message 0: %s\n" + "Timestamp 1: %llu\tCount 1: %llu\n" + "Error Message 1: %s\n", + event_data[0].timestamp, event_data[0].value, + strerror(event_data[0].status), + event_data[1].timestamp, event_data[1].value, + strerror(event_data[1].status)); + } + + return 0; +} diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h new file mode 100644 index 000000000000..47387c607035 --- /dev/null +++ b/tools/include/asm-generic/unaligned.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copied from the kernel sources to tools/perf/: + */ + +#ifndef __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H +#define __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) +#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) + +#endif /* __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H */ + diff --git a/tools/include/linux/list_sort.h b/tools/include/linux/list_sort.h new file mode 100644 index 000000000000..453105f74e05 --- /dev/null +++ b/tools/include/linux/list_sort.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LIST_SORT_H +#define _LINUX_LIST_SORT_H + +#include <linux/types.h> + +struct list_head; + +typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, + const struct list_head *, const struct list_head *); + +__attribute__((nonnull(2,3))) +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); +#endif diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 7e72d975cb76..aca52db2f3f3 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -66,6 +66,17 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + #else /* __ASSEMBLY__ */ /* @@ -127,6 +138,7 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 791f31dd0abe..ba5af15e25f5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1629,7 +1637,7 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return @@ -4046,7 +4054,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -4751,7 +4759,8 @@ union bpf_attr { * Each format specifier in **fmt** corresponds to one u64 element * in the **data** array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* - * array. The *data_len* is the size of *data* in bytes. + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. * * Formats **%s** and **%p{i,I}{4,6}** require to read kernel * memory. Reading kernel memory may fail due to either invalid @@ -4877,6 +4886,58 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. + * + * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5116,10 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(get_branch_snapshot), \ + FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5284,6 +5349,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u32 :32; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { @@ -5577,6 +5644,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { @@ -5594,6 +5662,8 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index d27b1708efe9..deb12f755f0f 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and VAR. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -56,25 +56,29 @@ struct btf_type { #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) -#define BTF_KIND_UNKN 0 /* Unknown */ -#define BTF_KIND_INT 1 /* Integer */ -#define BTF_KIND_PTR 2 /* Pointer */ -#define BTF_KIND_ARRAY 3 /* Array */ -#define BTF_KIND_STRUCT 4 /* Struct */ -#define BTF_KIND_UNION 5 /* Union */ -#define BTF_KIND_ENUM 6 /* Enumeration */ -#define BTF_KIND_FWD 7 /* Forward */ -#define BTF_KIND_TYPEDEF 8 /* Typedef */ -#define BTF_KIND_VOLATILE 9 /* Volatile */ -#define BTF_KIND_CONST 10 /* Const */ -#define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_FUNC 12 /* Function */ -#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ -#define BTF_KIND_VAR 14 /* Variable */ -#define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_FLOAT 16 /* Floating point */ -#define BTF_KIND_MAX BTF_KIND_FLOAT -#define NR_BTF_KINDS (BTF_KIND_MAX + 1) +enum { + BTF_KIND_UNKN = 0, /* Unknown */ + BTF_KIND_INT = 1, /* Integer */ + BTF_KIND_PTR = 2, /* Pointer */ + BTF_KIND_ARRAY = 3, /* Array */ + BTF_KIND_STRUCT = 4, /* Struct */ + BTF_KIND_UNION = 5, /* Union */ + BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_FWD = 7, /* Forward */ + BTF_KIND_TYPEDEF = 8, /* Typedef */ + BTF_KIND_VOLATILE = 9, /* Volatile */ + BTF_KIND_CONST = 10, /* Const */ + BTF_KIND_RESTRICT = 11, /* Restrict */ + BTF_KIND_FUNC = 12, /* Function */ + BTF_KIND_FUNC_PROTO = 13, /* Function Proto */ + BTF_KIND_VAR = 14, /* Variable */ + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +}; /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -170,4 +174,15 @@ struct btf_var_secinfo { __u32 size; }; +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe + * additional information related to the tag applied location. + * If component_idx == -1, the tag is applied to a struct, union, + * variable or function. Otherwise, it is applied to a struct/union + * member or a func argument, and component_idx indicates which member + * or argument (0 ... vlen-1). + */ +struct btf_decl_tag { + __s32 component_idx; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f92880a15645..bd8860eeb291 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1141,6 +1141,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1210,14 +1225,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1241,7 +1258,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ @@ -1307,6 +1330,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 43bd7f713c39..de45fcd2dcbe 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -235,7 +235,7 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) # define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 1d84ec9db93b..5859ca0a1439 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -784,6 +784,7 @@ struct snd_rawmidi_status { #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int) #define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) #define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) #define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b0bf56c5f120..5a5bd74f55bd 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -742,7 +742,7 @@ class DebugfsProvider(Provider): The fields are all available KVM debugfs files """ - exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns'] + exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns'] fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2] if field not in exempt_list] diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 5d4cfac671d5..0da84cb9e66d 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -libbpf_version.h libbpf.pc libbpf.so.* TAGS diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 74c3b73a5fbe..b393b5e82380 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -8,7 +8,8 @@ VERSION_SCRIPT := libbpf.map LIBBPF_VERSION := $(shell \ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \ sort -rV | head -n1 | cut -d'_' -f2) -LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) +LIBBPF_MAJOR_VERSION := $(word 1,$(subst ., ,$(LIBBPF_VERSION))) +LIBBPF_MINOR_VERSION := $(word 2,$(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory @@ -59,7 +60,8 @@ ifndef VERBOSE VERBOSE = 0 endif -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi +INCLUDES = -I$(if $(OUTPUT),$(OUTPUT),.) \ + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi export prefix libdir src obj @@ -112,6 +114,7 @@ STATIC_OBJDIR := $(OUTPUT)staticobjs/ BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h +BPF_GENERATED := $(BPF_HELPER_DEFS) LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) @@ -136,25 +139,19 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force $(BPF_GENERATED) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ - (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ - (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force $(BPF_GENERATED) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -179,7 +176,7 @@ $(OUTPUT)libbpf.pc: -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ < libbpf.pc.template > $@ -check: check_abi +check: check_abi check_version check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ @@ -205,6 +202,21 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) exit 1; \ fi +HDR_MAJ_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) +HDR_MIN_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) + +check_version: $(VERSION_SCRIPT) libbpf_version.h + @if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then \ + echo "Error: libbpf major version mismatch detected: " \ + "'$(HDR_MAJ_VERSION)' != '$(LIBBPF_MAJOR_VERSION)'" >&2; \ + exit 1; \ + fi + @if [ "$(HDR_MIN_VERSION)" != "$(LIBBPF_MINOR_VERSION)" ]; then \ + echo "Error: libbpf minor version mismatch detected: " \ + "'$(HDR_MIN_VERSION)' != '$(LIBBPF_MINOR_VERSION)'" >&2; \ + exit 1; \ + fi + define do_install_mkdir if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ @@ -223,14 +235,24 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ - bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h \ - bpf_endian.h bpf_core_read.h skel_internal.h +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ + bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ + skel_internal.h libbpf_version.h +GEN_HDRS := $(BPF_GENERATED) + +INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf +INSTALL_SRC_HDRS := $(addprefix $(INSTALL_PFX)/, $(SRC_HDRS)) +INSTALL_GEN_HDRS := $(addprefix $(INSTALL_PFX)/, $(notdir $(GEN_HDRS))) + +$(INSTALL_SRC_HDRS): $(INSTALL_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) + +$(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) -install_headers: $(BPF_HELPER_DEFS) - $(call QUIET_INSTALL, headers) \ - $(foreach hdr,$(INSTALL_HEADERS), \ - $(call do_install,$(hdr),$(prefix)/include/bpf,644);) +install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ @@ -240,12 +262,12 @@ install: install_lib install_pkgconfig install_headers clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ - *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) -PHONY += force cscope tags +PHONY += force cscope tags check check_abi check_version force: cscope: diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2401fad090c5..c09cbb868c9f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -65,19 +65,28 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + int fd; + + fd = sys_bpf(cmd, attr, size); + return ensure_good_fd(fd); +} + static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) { int retries = 5; int fd; do { - fd = sys_bpf(BPF_PROG_LOAD, attr, size); + fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); } while (fd < 0 && errno == EAGAIN && retries-- > 0); return fd; } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) { union bpf_attr attr; int fd; @@ -102,11 +111,36 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) create_attr->btf_vmlinux_value_type_id; else attr.inner_map_fd = create_attr->inner_map_fd; + attr.map_extra = create_attr->map_extra; - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + struct bpf_create_map_params p = {}; + + p.map_type = create_attr->map_type; + p.key_size = create_attr->key_size; + p.value_size = create_attr->value_size; + p.max_entries = create_attr->max_entries; + p.map_flags = create_attr->map_flags; + p.name = create_attr->name; + p.numa_node = create_attr->numa_node; + p.btf_fd = create_attr->btf_fd; + p.btf_key_type_id = create_attr->btf_key_type_id; + p.btf_value_type_id = create_attr->btf_value_type_id; + p.map_ifindex = create_attr->map_ifindex; + if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + p.inner_map_fd = create_attr->inner_map_fd; + + return libbpf__bpf_create_map_xattr(&p); +} + int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) @@ -181,7 +215,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.numa_node = node; } - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -264,6 +298,7 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); + attr.fd_array = ptr_to_u64(load_attr->fd_array); if (load_attr->name) memcpy(attr.prog_name, load_attr->name, @@ -608,7 +643,7 @@ int bpf_obj_get(const char *pathname) memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); - fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -719,7 +754,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -762,7 +797,7 @@ int bpf_iter_create(int link_fd) memset(&attr, 0, sizeof(attr)); attr.iter_create.link_fd = link_fd; - fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -920,7 +955,7 @@ int bpf_prog_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.prog_id = id; - fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -932,7 +967,7 @@ int bpf_map_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.map_id = id; - fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -944,7 +979,7 @@ int bpf_btf_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.btf_id = id; - fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -956,7 +991,7 @@ int bpf_link_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.link_id = id; - fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -987,7 +1022,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1007,7 +1042,7 @@ retry: attr.btf_log_buf = ptr_to_u64(log_buf); } - fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; @@ -1049,7 +1084,7 @@ int bpf_enable_stats(enum bpf_stats_type type) memset(&attr, 0, sizeof(attr)); attr.enable_stats.type = type; - fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 09ebe3db5f2f..e4aa9996a550 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -40,7 +40,7 @@ enum bpf_enum_value_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst, \ diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 615400391e57..d26e5472fe50 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -7,6 +7,21 @@ struct ksym_relo_desc { const char *name; int kind; int insn_idx; + bool is_weak; + bool is_typeless; +}; + +struct ksym_desc { + const char *name; + int ref; + int kind; + union { + /* used for kfunc */ + int off; + /* used for typeless ksym */ + bool typeless; + }; + int insn; }; struct bpf_gen { @@ -24,18 +39,23 @@ struct bpf_gen { int relo_cnt; char attach_target[128]; int attach_kind; + struct ksym_desc *ksyms; + __u32 nr_ksyms; + int fd_array; + int nr_fd_array; }; void bpf_gen__init(struct bpf_gen *gen, int log_level); int bpf_gen__finish(struct bpf_gen *gen); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); struct bpf_prog_load_params; void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx); #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index b9987c3efa3c..963b1060d944 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -14,14 +14,6 @@ #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] -/* Helper macro to print out debug messages */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names @@ -224,4 +216,47 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +#ifdef BPF_NO_GLOBAL_DATA +#define BPF_PRINTK_FMT_MOD +#else +#define BPF_PRINTK_FMT_MOD static const +#endif + +#define __bpf_printk(fmt, ...) \ +({ \ + BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* + * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments + * instead of an array of u64. + */ +#define __bpf_vprintk(fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_trace_vprintk(___fmt, sizeof(___fmt), \ + ___param, sizeof(___param)); \ +}) + +/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args + * Otherwise use __bpf_vprintk + */ +#define ___bpf_pick_printk(...) \ + ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ + __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ + __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ + __bpf_printk /*1*/, __bpf_printk /*0*/) + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) + #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index d6bfbe009296..db05a5937105 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -24,6 +24,9 @@ #elif defined(__TARGET_ARCH_sparc) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__TARGET_ARCH_riscv) + #define bpf_target_riscv + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -48,6 +51,9 @@ #elif defined(__sparc__) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__riscv) && __riscv_xlen == 64 + #define bpf_target_riscv + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -288,6 +294,32 @@ struct pt_regs; #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif +#elif defined(bpf_target_riscv) + +struct pt_regs; +#define PT_REGS_RV const volatile struct user_regs_struct +#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) +#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) +#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) +#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) +#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) +#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) +#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) +#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) +#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) + #endif #if defined(bpf_target_powerpc) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 77dc24d58302..7e4c5586bd87 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -57,7 +57,7 @@ struct btf { * representation is broken up into three independently allocated * memory regions to be able to modify them independently. * raw_data is nulled out at that point, but can be later allocated - * and cached again if user calls btf__get_raw_data(), at which point + * and cached again if user calls btf__raw_data(), at which point * raw_data will contain a contiguous copy of header, types, and * strings: * @@ -189,12 +189,17 @@ int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_ return 0; } +static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) +{ + return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); +} + static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; - p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types, BTF_MAX_NR_TYPES, 1); + p = btf_add_type_offs_mem(btf, 1); if (!p) return -ENOMEM; @@ -231,17 +236,23 @@ static int btf_parse_hdr(struct btf *btf) } btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic: %x\n", hdr->magic); + return -EINVAL; + } + + if (btf->raw_size < hdr->hdr_len) { + pr_debug("BTF header len %u larger than data size %u\n", + hdr->hdr_len, btf->raw_size); return -EINVAL; } - meta_left = btf->raw_size - sizeof(*hdr); - if (meta_left < hdr->str_off + hdr->str_len) { - pr_debug("Invalid BTF total size:%u\n", btf->raw_size); + meta_left = btf->raw_size - hdr->hdr_len; + if (meta_left < (long long)hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } - if (hdr->type_off + hdr->type_len > hdr->str_off) { + if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; @@ -304,6 +315,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -376,6 +389,9 @@ static int btf_bswap_type_rest(struct btf_type *t) v->size = bswap_32(v->size); } return 0; + case BTF_KIND_DECL_TAG: + btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx); + return 0; default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -425,6 +441,11 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->start_id + btf->nr_types - 1; } +__u32 btf__type_cnt(const struct btf *btf) +{ + return btf->start_id + btf->nr_types; +} + const struct btf *btf__base_btf(const struct btf *btf) { return btf->base_btf; @@ -456,8 +477,8 @@ static int determine_ptr_size(const struct btf *btf) if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -517,9 +538,9 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) static bool is_host_big_endian(void) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return false; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return true; #else # error "Unrecognized __BYTE_ORDER__" @@ -586,6 +607,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -673,12 +695,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) __s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (!strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = 1; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); @@ -689,15 +711,15 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return libbpf_err(-ENOENT); } -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) +static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, + const char *type_name, __u32 kind) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = start_id; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -711,6 +733,18 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return libbpf_err(-ENOENT); } +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); +} + +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, 1, type_name, kind); +} + static bool btf_is_modifiable(const struct btf *btf) { return (void *)btf->hdr != btf->raw_data; @@ -758,7 +792,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -808,7 +842,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -863,7 +897,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("failed to open %s: %s\n", path, strerror(errno)); @@ -1084,99 +1118,6 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse(path, base_btf, NULL)); } -static int compare_vsi_off(const void *_a, const void *_b) -{ - const struct btf_var_secinfo *a = _a; - const struct btf_var_secinfo *b = _b; - - return a->offset - b->offset; -} - -static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, - struct btf_type *t) -{ - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; - struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; - - if (!name) { - pr_debug("No name found in string section for DATASEC kind.\n"); - return -ENOENT; - } - - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables - */ - if (t->size) - goto sort_vars; - - ret = bpf_object__section_size(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } - - t->size = size; - - for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { - t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); - return -EINVAL; - } - - if (var->linkage == BTF_VAR_STATIC) - continue; - - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); - return -ENOENT; - } - - ret = bpf_object__variable_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); - return -ENOENT; - } - - vsi->offset = off; - } - -sort_vars: - qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); - return 0; -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - int err = 0; - __u32 i; - - for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = btf_type_by_id(btf, i); - - /* Loader needs to fix up some of the things compiler - * couldn't get its hands on while emitting BTF. This - * is section size and global variable offset. We use - * the info from the ELF itself for this purpose. - */ - if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); - if (err) - break; - } - } - - return libbpf_err(err); -} - static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); int btf__load_into_kernel(struct btf *btf) @@ -1294,7 +1235,7 @@ err_out: return NULL; } -const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) { struct btf *btf = (struct btf *)btf_ro; __u32 data_sz; @@ -1302,7 +1243,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); if (!data) - return errno = -ENOMEM, NULL; + return errno = ENOMEM, NULL; btf->raw_size = data_sz; if (btf->swapped_endian) @@ -1313,6 +1254,9 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) return data; } +__attribute__((alias("btf__raw_data"))) +const void *btf__get_raw_data(const struct btf *btf, __u32 *size); + const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) @@ -1685,6 +1629,111 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return btf_commit_type(btf, sz); } +static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +{ + struct btf *btf = ctx; + + if (!*type_id) /* nothing to do for VOID references */ + return 0; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + return 0; +} + +int btf__add_btf(struct btf *btf, const struct btf *src_btf) +{ + struct btf_pipe p = { .src = src_btf, .dst = btf }; + int data_sz, sz, cnt, i, err, old_strs_len; + __u32 *off; + void *t; + + /* appending split BTF isn't supported yet */ + if (src_btf->base_btf) + return libbpf_err(-ENOTSUP); + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + /* remember original strings section size if we have to roll back + * partial strings section changes + */ + old_strs_len = btf->hdr->str_len; + + data_sz = src_btf->hdr->type_len; + cnt = btf__type_cnt(src_btf) - 1; + + /* pre-allocate enough memory for new types */ + t = btf_add_type_mem(btf, data_sz); + if (!t) + return libbpf_err(-ENOMEM); + + /* pre-allocate enough memory for type offset index for new types */ + off = btf_add_type_offs_mem(btf, cnt); + if (!off) + return libbpf_err(-ENOMEM); + + /* bulk copy types data for all types from src_btf */ + memcpy(t, src_btf->types_data, data_sz); + + for (i = 0; i < cnt; i++) { + sz = btf_type_size(t); + if (sz < 0) { + /* unlikely, has to be corrupted src_btf */ + err = sz; + goto err_out; + } + + /* fill out type ID to type offset mapping for lookups by type ID */ + *off = t - btf->types_data; + + /* add, dedup, and remap strings referenced by this BTF type */ + err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + if (err) + goto err_out; + + /* remap all type IDs referenced from this BTF type */ + err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + if (err) + goto err_out; + + /* go to next type data and type offset index entry */ + t += sz; + off++; + } + + /* Up until now any of the copied type data was effectively invisible, + * so if we exited early before this point due to error, BTF would be + * effectively unmodified. There would be extra internal memory + * pre-allocated, but it would not be available for querying. But now + * that we've copied and rewritten all the data successfully, we can + * update type count and various internal offsets and sizes to + * "commit" the changes and made them visible to the outside world. + */ + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types += cnt; + + /* return type ID of the first added BTF type */ + return btf->start_id + btf->nr_types - cnt; +err_out: + /* zero out preallocated memory as if it was just allocated with + * libbpf_add_mem() + */ + memset(btf->types_data + btf->hdr->type_len, 0, data_sz); + memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + + /* and now restore original strings section size; types data size + * wasn't modified, so doesn't need restoring, see big comment above */ + btf->hdr->str_len = old_strs_len; + + return libbpf_err(err); +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; @@ -1933,7 +1982,7 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) static struct btf_type *btf_last_type(struct btf *btf) { - return btf_type_by_id(btf, btf__get_nr_types(btf)); + return btf_type_by_id(btf, btf__type_cnt(btf) - 1); } /* @@ -2440,6 +2489,48 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return 0; } +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + struct btf_type *t; + int sz, value_off; + + if (!value || !value[0] || component_idx < -1) + return libbpf_err(-EINVAL); + + if (validate_type_id(ref_type_id)) + return libbpf_err(-EINVAL); + + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); + t = btf_add_type_mem(btf, sz); + if (!t) + return libbpf_err(-ENOMEM); + + value_off = btf__add_str(btf, value); + if (value_off < 0) + return value_off; + + t->name_off = value_off; + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); + t->type = ref_type_id; + btf_decl_tag(t)->component_idx = component_idx; + + return btf_commit_type(btf, sz); +} + struct btf_ext_sec_setup_param { __u32 off; __u32 len; @@ -2914,8 +3005,10 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, return libbpf_err(-EINVAL); } - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + if (btf_ensure_modifiable(btf)) { + err = -ENOMEM; + goto done; + } err = btf_dedup_prep(d); if (err) { @@ -3095,7 +3188,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, goto done; } - type_cnt = btf__get_nr_types(btf) + 1; + type_cnt = btf__type_cnt(btf); d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; @@ -3256,8 +3349,8 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } -/* Calculate type signature hash of INT. */ -static long btf_hash_int(struct btf_type *t) +/* Calculate type signature hash of INT or TAG. */ +static long btf_hash_int_decl_tag(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); long h; @@ -3267,8 +3360,8 @@ static long btf_hash_int(struct btf_type *t) return h; } -/* Check structural equality of two INTs. */ -static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +/* Check structural equality of two INTs or TAGs. */ +static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) { __u32 info1, info2; @@ -3535,7 +3628,8 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_common(t); break; case BTF_KIND_INT: - h = btf_hash_int(t); + case BTF_KIND_DECL_TAG: + h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: h = btf_hash_enum(t); @@ -3590,14 +3684,15 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: case BTF_KIND_VAR: case BTF_KIND_DATASEC: + case BTF_KIND_DECL_TAG: return 0; case BTF_KIND_INT: - h = btf_hash_int(t); + h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); - if (btf_equal_int(t, cand)) { + if (btf_equal_int_tag(t, cand)) { new_id = cand_id; break; } @@ -3881,7 +3976,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, switch (cand_kind) { case BTF_KIND_INT: - return btf_equal_int(cand_type, canon_type); + return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: if (d->opts.dont_resolve_fwds) @@ -4210,6 +4305,23 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } break; + case BTF_KIND_DECL_TAG: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_int_decl_tag(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = btf_type_by_id(d->btf, cand_id); + if (btf_equal_int_tag(t, cand)) { + new_id = cand_id; + break; + } + } + break; + case BTF_KIND_ARRAY: { struct btf_array *info = btf_array(t); @@ -4482,6 +4594,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4a711f990904..bc005ba3ceec 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (c) 2018 Facebook */ +/*! \file */ #ifndef __LIBBPF_BTF_H #define __LIBBPF_BTF_H @@ -30,11 +31,80 @@ enum btf_endianness { BTF_BIG_ENDIAN = 1, }; +/** + * @brief **btf__free()** frees all data of a BTF object + * @param btf BTF object to free + */ LIBBPF_API void btf__free(struct btf *btf); +/** + * @brief **btf__new()** creates a new instance of a BTF object from the raw + * bytes of an ELF's BTF section + * @param data raw bytes + * @param size number of bytes passed in `data` + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new(const void *data, __u32 size); + +/** + * @brief **btf__new_split()** create a new instance of a BTF object from the + * provided raw data bytes. It takes another BTF instance, **base_btf**, which + * serves as a base BTF, which is extended by types in a newly created BTF + * instance + * @param data raw bytes + * @param size length of raw bytes + * @param base_btf the base BTF object + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and + * creates non-split BTF. + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); + +/** + * @brief **btf__new_empty()** creates an empty BTF object. Use + * `btf__add_*()` to populate such BTF object. + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_empty(void); + +/** + * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an + * ELF BTF section except with a base BTF on top of which split BTF should be + * based + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to + * `btf__new_empty()` and creates non-split BTF. + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); @@ -50,16 +120,21 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); +LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); @@ -72,7 +147,9 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, @@ -101,6 +178,28 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type); +/** + * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* + * @param btf BTF object which all the BTF types and strings are added to + * @param src_btf BTF object which all BTF types and referenced strings are copied from + * @return BTF type ID of the first appended BTF type, or negative error code + * + * **btf__add_btf()** can be used to simply and efficiently append the entire + * contents of one BTF object to another one. All the BTF type data is copied + * over, all referenced type IDs are adjusted by adding a necessary ID offset. + * Only strings referenced from BTF types are copied over and deduplicated, so + * if there were some unused strings in *src_btf*, those won't be copied over, + * which is consistent with the general string deduplication semantics of BTF + * writing APIs. + * + * If any error is encountered during this process, the contents of *btf* is + * left intact, which means that **btf__add_btf()** follows the transactional + * semantics and the operation as a whole is all-or-nothing. + * + * *src_btf* has to be non-split BTF, as of now copying types from split BTF + * is not supported and will result in -ENOTSUP error code returned. + */ +LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); @@ -141,6 +240,10 @@ LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz); +/* tag construction API */ +LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; @@ -328,6 +431,11 @@ static inline bool btf_is_float(const struct btf_type *t) return btf_kind(t) == BTF_KIND_FLOAT; } +static inline bool btf_is_decl_tag(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_DECL_TAG; +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -396,6 +504,12 @@ btf_var_secinfos(const struct btf_type *t) return (struct btf_var_secinfo *)(t + 1); } +struct btf_decl_tag; +static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) +{ + return (struct btf_decl_tag *)(t + 1); +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index e4b483f15fb9..17db62b5002e 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -188,7 +188,7 @@ err: static int btf_dump_resize(struct btf_dump *d) { - int err, last_id = btf__get_nr_types(d->btf); + int err, last_id = btf__type_cnt(d->btf) - 1; if (last_id <= d->last_id) return 0; @@ -262,7 +262,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) { int err, i; - if (id > btf__get_nr_types(d->btf)) + if (id >= btf__type_cnt(d->btf)) return libbpf_err(-EINVAL); err = btf_dump_resize(d); @@ -294,11 +294,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) */ static int btf_dump_mark_referenced(struct btf_dump *d) { - int i, j, n = btf__get_nr_types(d->btf); + int i, j, n = btf__type_cnt(d->btf); const struct btf_type *t; __u16 vlen; - for (i = d->last_id + 1; i <= n; i++) { + for (i = d->last_id + 1; i < n; i++) { t = btf__type_by_id(d->btf, i); vlen = btf_vlen(t); @@ -316,6 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: d->type_states[t->type].referenced = 1; break; @@ -583,6 +584,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DATASEC: + case BTF_KIND_DECL_TAG: d->type_states[id].order_state = ORDERED; return 0; @@ -1560,29 +1562,28 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u64 *value) { __u16 left_shift_bits, right_shift_bits; - __u8 nr_copy_bits, nr_copy_bytes; const __u8 *bytes = data; - int sz = t->size; + __u8 nr_copy_bits; __u64 num = 0; int i; /* Maximum supported bitfield size is 64 bits */ - if (sz > 8) { - pr_warn("unexpected bitfield size %d\n", sz); + if (t->size > 8) { + pr_warn("unexpected bitfield size %d\n", t->size); return -EINVAL; } /* Bitfield value retrieval is done in two steps; first relevant bytes are * stored in num, then we left/right shift num to eliminate irrelevant bits. */ - nr_copy_bits = bit_sz + bits_offset; - nr_copy_bytes = t->size; -#if __BYTE_ORDER == __LITTLE_ENDIAN - for (i = nr_copy_bytes - 1; i >= 0; i--) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + for (i = t->size - 1; i >= 0; i--) num = num * 256 + bytes[i]; -#elif __BYTE_ORDER == __BIG_ENDIAN - for (i = 0; i < nr_copy_bytes; i++) + nr_copy_bits = bit_sz + bits_offset; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (i = 0; i < t->size; i++) num = num * 256 + bytes[i]; + nr_copy_bits = t->size * 8 - bits_offset; #else # error "Unrecognized __BYTE_ORDER__" #endif @@ -1656,9 +1657,15 @@ static int btf_dump_base_type_check_zero(struct btf_dump *d, return 0; } -static bool ptr_is_aligned(const void *data, int data_sz) +static bool ptr_is_aligned(const struct btf *btf, __u32 type_id, + const void *data) { - return ((uintptr_t)data) % data_sz == 0; + int alignment = btf__align_of(btf, type_id); + + if (alignment == 0) + return false; + + return ((uintptr_t)data) % alignment == 0; } static int btf_dump_int_data(struct btf_dump *d, @@ -1669,9 +1676,10 @@ static int btf_dump_int_data(struct btf_dump *d, { __u8 encoding = btf_int_encoding(t); bool sign = encoding & BTF_INT_SIGNED; + char buf[16] __attribute__((aligned(16))); int sz = t->size; - if (sz == 0) { + if (sz == 0 || sz > sizeof(buf)) { pr_warn("unexpected size %d for id [%u]\n", sz, type_id); return -EINVAL; } @@ -1679,8 +1687,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* handle packed int data - accesses of integers not aligned on * int boundaries can cause problems on some platforms. */ - if (!ptr_is_aligned(data, sz)) - return btf_dump_bitfield_data(d, t, data, 0, 0); + if (!ptr_is_aligned(d->btf, type_id, data)) { + memcpy(buf, data, sz); + data = buf; + } switch (sz) { case 16: { @@ -1690,10 +1700,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* avoid use of __int128 as some 32-bit platforms do not * support it. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ lsi = ints[0]; msi = ints[1]; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ lsi = ints[1]; msi = ints[0]; #else @@ -1766,7 +1776,7 @@ static int btf_dump_float_data(struct btf_dump *d, int sz = t->size; /* handle unaligned data; copy to local union */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, type_id, data)) { memcpy(&fl, data, sz); flp = &fl; } @@ -1929,7 +1939,7 @@ static int btf_dump_ptr_data(struct btf_dump *d, __u32 id, const void *data) { - if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { + if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) { btf_dump_type_values(d, "%p", *(void **)data); } else { union ptr_data pt; @@ -1949,10 +1959,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - int sz = t->size; - /* handle unaligned enum value */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; @@ -2215,6 +2223,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_DECL_TAG: err = btf_dump_unsupported_data(d, t, id); break; case BTF_KIND_INT: diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 8df718a6b142..502dea53a742 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -5,6 +5,7 @@ #include <string.h> #include <errno.h> #include <linux/filter.h> +#include <sys/param.h> #include "btf.h" #include "bpf.h" #include "libbpf.h" @@ -12,9 +13,12 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" +#include <asm/byteorder.h> -#define MAX_USED_MAPS 64 -#define MAX_USED_PROGS 32 +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 +#define MAX_KFUNC_DESCS 256 +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -29,7 +33,6 @@ */ struct loader_stack { __u32 btf_fd; - __u32 map_fd[MAX_USED_MAPS]; __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; }; @@ -135,16 +138,56 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level) static int add_data(struct bpf_gen *gen, const void *data, __u32 size) { + __u32 size8 = roundup(size, 8); + __u64 zero = 0; void *prev; - if (realloc_data_buf(gen, size)) + if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; - memcpy(gen->data_cur, data, size); - gen->data_cur += size; + if (data) { + memcpy(gen->data_cur, data, size); + memcpy(gen->data_cur + size, &zero, size8 - size); + } else { + memset(gen->data_cur, 0, size8); + } + gen->data_cur += size8; return prev - gen->data_start; } +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative + * to start of fd_array. Caller can decide if it is usable or not. + */ +static int add_map_fd(struct bpf_gen *gen) +{ + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_maps == MAX_USED_MAPS) { + pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); + gen->error = -E2BIG; + return 0; + } + return gen->nr_maps++; +} + +static int add_kfunc_btf_fd(struct bpf_gen *gen) +{ + int cur; + + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_fd_array == MAX_KFUNC_DESCS) { + cur = add_data(gen, NULL, sizeof(int)); + return (cur - gen->fd_array) / sizeof(int); + } + return MAX_USED_MAPS + gen->nr_fd_array++; +} + +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -166,14 +209,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) { - emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); - emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, off)); - emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); } static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, @@ -321,11 +372,11 @@ int bpf_gen__finish(struct bpf_gen *gen) offsetof(struct bpf_prog_desc, prog_fd), 4, stack_off(prog_fd[i])); for (i = 0; i < gen->nr_maps; i++) - move_stack2ctx(gen, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * i + - offsetof(struct bpf_map_desc, map_fd), 4, - stack_off(map_fd[i])); + move_blob2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); pr_debug("gen: finish %d\n", gen->error); @@ -381,11 +432,11 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_attr *map_attr, int map_idx) + struct bpf_create_map_params *map_attr, int map_idx) { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; - int map_create_attr; + int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); @@ -393,6 +444,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, attr.key_size = map_attr->key_size; attr.value_size = map_attr->value_size; attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; memcpy(attr.map_name, map_attr->name, min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = map_attr->numa_node; @@ -462,9 +514,11 @@ void bpf_gen__map_create(struct bpf_gen *gen, gen->error = -EDOM; /* internal bug */ return; } else { - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, - stack_off(map_fd[map_idx]))); - gen->nr_maps++; + /* add_map_fd does gen->nr_maps++ */ + idx = add_map_fd(gen); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, idx))); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); } if (close_inner_map_fd) emit_sys_close_stack(gen, stack_off(inner_map_fd)); @@ -506,8 +560,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) */ } -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, - int insn_idx) +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -519,38 +573,292 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, gen->relos = relo; relo += gen->relo_cnt; relo->name = name; + relo->is_weak = is_weak; + relo->is_typeless = is_typeless; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; } -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +/* returns existing ksym_desc with ref incremented, or inserts a new one */ +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { - int name, insn, len = strlen(relo->name) + 1; + struct ksym_desc *kdesc; - pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); - name = add_data(gen, relo->name, len); + for (int i = 0; i < gen->nr_ksyms; i++) { + if (!strcmp(gen->ksyms[i].name, relo->name)) { + gen->ksyms[i].ref++; + return &gen->ksyms[i]; + } + } + kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); + if (!kdesc) { + gen->error = -ENOMEM; + return NULL; + } + gen->ksyms = kdesc; + kdesc = &gen->ksyms[gen->nr_ksyms++]; + kdesc->name = relo->name; + kdesc->kind = relo->kind; + kdesc->ref = 1; + kdesc->off = 0; + kdesc->insn = 0; + return kdesc; +} + +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + */ +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1; + name_off = add_data(gen, relo->name, len); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name)); + 0, 0, 0, name_off)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); - emit_check_err(gen); +} + +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + * Returns u64 symbol addr in BPF_REG_9 + */ +static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1, res_off; + + name_off = add_data(gen, relo->name, len); + res_off = add_data(gen, NULL, 8); /* res is u64 */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name_off)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, res_off)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name)); + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + * + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, + * this would mean a new BTF fd index for each entry. By pairing symbol name + * with index, we get the insn->imm, insn->off pairing that kernel uses for + * kfunc_tab, which becomes the effective limit even though all of them may + * share same index in fd_array (such that kfunc_btf_tab has 1 element). + */ +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + int btf_fd_idx; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing bpf_insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, + kdesc->insn + offsetof(struct bpf_insn, off)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* get index in fd_array to store BTF FD at */ + btf_fd_idx = add_kfunc_btf_fd(gen); + if (btf_fd_idx > INT16_MAX) { + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", + btf_fd_idx, relo->name); + gen->error = -E2BIG; + return; + } + kdesc->off = btf_fd_idx; + /* set a default value for imm */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + /* skip success case store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); /* store btf_id into insn[insn_idx].imm */ - insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + - offsetof(struct bpf_insn, imm); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, insn)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); - if (relo->kind == BTF_KIND_VAR) { - /* store btf_obj_fd into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, - sizeof(struct bpf_insn))); + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); + /* skip store of BTF fd if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); + /* store BTF fd in slot */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* set a default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip insn->off store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); + /* skip if vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); + /* store index into insn[insn_idx].off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, + offsetof(struct bpf_insn, off))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", + relo->name, kdesc->ref); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); + debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", + relo->name, kdesc->ref); +} + +static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo, + int ref) +{ + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + + offsetof(struct bpf_insn, imm))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d", + relo->is_typeless, relo->is_weak, relo->name, ref); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg", + relo->is_typeless, relo->is_weak, relo->name, ref); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_typeless(struct bpf_gen *gen, + struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + goto log; + } + /* remember insn offset, so we can copy ksym addr later */ + kdesc->insn = insn; + /* skip typeless ksym_desc in fd closing loop in cleanup_relos */ + kdesc->typeless = true; + emit_bpf_kallsyms_lookup_name(gen, relo); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1)); + emit_check_err(gen); + /* store lower half of addr into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm))); + /* store upper half of addr into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +log: + emit_ksym_relo_log(gen, relo, kdesc->ref); +} + +static __u32 src_reg_mask(void) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + return 0x0f; /* src_reg,dst_reg,... */ +#elif defined(__BIG_ENDIAN_BITFIELD) + return 0xf0; /* dst_reg,src_reg,... */ +#else +#error "Unsupported bit endianness, cannot proceed" +#endif +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + __u32 reg_mask; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); + /* jump over src_reg adjustment if imm is not 0 */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); + goto clear_src_reg; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* set default values as 0 */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); + /* skip success case stores if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); +clear_src_reg: + /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ + reg_mask = src_reg_mask(); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); + emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); + + emit_ksym_relo_log(gen, relo, kdesc->ref); +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int insn; + + pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); + switch (relo->kind) { + case BTF_KIND_VAR: + if (relo->is_typeless) + emit_relo_ksym_typeless(gen, relo, insn); + else + emit_relo_ksym_btf(gen, relo, insn); + break; + case BTF_KIND_FUNC: + emit_relo_kfunc_btf(gen, relo, insn); + break; + default: + pr_warn("Unknown relocation kind '%d'\n", relo->kind); + gen->error = -EDOM; + return; } } @@ -566,14 +874,23 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; - for (i = 0; i < gen->relo_cnt; i++) { - if (gen->relos[i].kind != BTF_KIND_VAR) - continue; - /* close fd recorded in insn[insn_idx + 1].imm */ - insn = insns + - sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + - offsetof(struct bpf_insn, imm); - emit_sys_close_blob(gen, insn); + for (i = 0; i < gen->nr_ksyms; i++) { + /* only close fds for typed ksyms and kfuncs */ + if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = gen->ksyms[i].insn; + insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { + emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); + if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + gen->nr_fd_array--; + } + } + if (gen->nr_ksyms) { + free(gen->ksyms); + gen->nr_ksyms = 0; + gen->ksyms = NULL; } if (gen->relo_cnt) { free(gen->relos); @@ -632,9 +949,8 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); - /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ - emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), - stack_off(map_fd[0])); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ + emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, @@ -701,8 +1017,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); map_update_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); emit_rel_store(gen, attr_field(map_update_attr, value), value); /* emit MAP_UPDATE_ELEM command */ @@ -720,8 +1036,8 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) memset(&attr, 0, attr_size); pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); debug_ret(gen, "map_freeze"); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 88d8825fc6f6..a1bea1953df6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -195,6 +195,8 @@ enum kern_feature_id { FEAT_BTF_FLOAT, /* BPF perf link support */ FEAT_PERF_LINK, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, __FEAT_CNT, }; @@ -218,18 +220,40 @@ struct reloc_desc { struct bpf_sec_def; -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, - struct bpf_program *prog); +typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); +typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); + +/* stored as sec_def->cookie for all libbpf-supported SEC()s */ +enum sec_def_flags { + SEC_NONE = 0, + /* expected_attach_type is optional, if kernel doesn't support that */ + SEC_EXP_ATTACH_OPT = 1, + /* legacy, only used by libbpf_get_type_names() and + * libbpf_attach_type_by_name(), not used by libbpf itself at all. + * This used to be associated with cgroup (and few other) BPF programs + * that were attachable through BPF_PROG_ATTACH command. Pretty + * meaningless nowadays, though. + */ + SEC_ATTACHABLE = 2, + SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, + /* attachment target is specified through BTF ID in either kernel or + * other BPF program's BTF object */ + SEC_ATTACH_BTF = 4, + /* BPF program type allows sleeping/blocking in kernel */ + SEC_SLEEPABLE = 8, + /* allow non-strict prefix matching */ + SEC_SLOPPY_PFX = 16, +}; struct bpf_sec_def { const char *sec; - size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; - bool is_exp_attach_type_optional; - bool is_attachable; - bool is_attach_btf; - bool is_sleepable; + long cookie; + + init_fn_t init_fn; + preload_fn_t preload_fn; attach_fn_t attach_fn; }; @@ -261,7 +285,7 @@ struct bpf_program { size_t sub_insn_off; char *name; - /* sec_name with / replaced by _; makes recursive pinning + /* name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; @@ -346,15 +370,14 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; -static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = DATA_SEC, - [LIBBPF_MAP_BSS] = BSS_SEC, - [LIBBPF_MAP_RODATA] = RODATA_SEC, - [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, -}; - struct bpf_map { char *name; + /* real_name is defined for special internal maps (.rodata*, + * .data*, .bss, .kconfig) and preserves their original ELF section + * name. This is important to be be able to find corresponding BTF + * DATASEC information. + */ + char *real_name; int fd; int sec_idx; size_t sec_offset; @@ -377,6 +400,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + __u64 map_extra; }; enum extern_type { @@ -419,6 +443,11 @@ struct extern_desc { /* local btf_id of the ksym extern's type. */ __u32 type_id; + /* BTF fd index to be patched in for insn->off, this is + * 0 for vmlinux BTF, index in obj->fd_array for module + * BTF + */ + __s16 btf_fd_idx; } ksym; }; }; @@ -430,6 +459,41 @@ struct module_btf { char *name; __u32 id; int fd; + int fd_array_idx; +}; + +enum sec_type { + SEC_UNUSED = 0, + SEC_RELO, + SEC_BSS, + SEC_DATA, + SEC_RODATA, +}; + +struct elf_sec_desc { + enum sec_type sec_type; + Elf64_Shdr *shdr; + Elf_Data *data; +}; + +struct elf_state { + int fd; + const void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + Elf64_Ehdr *ehdr; + Elf_Data *symbols; + Elf_Data *st_ops_data; + size_t shstrndx; /* section index for section name strings */ + size_t strtabidx; + struct elf_sec_desc *secs; + int sec_cnt; + int maps_shndx; + int btf_maps_shndx; + __u32 btf_maps_sec_btf_id; + int text_shndx; + int symbols_shndx; + int st_ops_shndx; }; struct bpf_object { @@ -447,47 +511,17 @@ struct bpf_object { struct extern_desc *externs; int nr_extern; int kconfig_map_idx; - int rodata_map_idx; bool loaded; bool has_subcalls; + bool has_rodata; struct bpf_gen *gen_loader; + /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ + struct elf_state efile; /* - * Information when doing elf related work. Only valid if fd - * is valid. - */ - struct { - int fd; - const void *obj_buf; - size_t obj_buf_sz; - Elf *elf; - GElf_Ehdr ehdr; - Elf_Data *symbols; - Elf_Data *data; - Elf_Data *rodata; - Elf_Data *bss; - Elf_Data *st_ops_data; - size_t shstrndx; /* section index for section name strings */ - size_t strtabidx; - struct { - GElf_Shdr shdr; - Elf_Data *data; - } *reloc_sects; - int nr_reloc_sects; - int maps_shndx; - int btf_maps_shndx; - __u32 btf_maps_sec_btf_id; - int text_shndx; - int symbols_shndx; - int data_shndx; - int rodata_shndx; - int bss_shndx; - int st_ops_shndx; - } efile; - /* - * All loaded bpf_object is linked in a list, which is + * All loaded bpf_object are linked in a list, which is * hidden to caller. bpf_objects__<func> handlers deal with * all objects. */ @@ -515,17 +549,22 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; + int *fd_array; + size_t fd_array_cap; + size_t fd_array_cnt; + char path[]; }; -#define obj_elf_valid(o) ((o)->efile.elf) static const char *elf_sym_str(const struct bpf_object *obj, size_t off); static const char *elf_sec_str(const struct bpf_object *obj, size_t off); static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { @@ -580,7 +619,16 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) { char *name, *p; - name = p = strdup(prog->sec_name); + if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) + name = strdup(prog->name); + else + name = strdup(prog->sec_name); + + if (!name) + return NULL; + + p = name; + while ((p = strchr(p, '/'))) *p = '_'; @@ -667,25 +715,25 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; int nr_progs, err, i; const char *name; - GElf_Sym sym; + Elf64_Sym *sym; progs = obj->programs; nr_progs = obj->nr_programs; - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); sec_off = 0; for (i = 0; i < nr_syms; i++) { - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != sec_idx) + sym = elf_sym_by_idx(obj, i); + + if (sym->st_shndx != sec_idx) continue; - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; - prog_sz = sym.st_size; - sec_off = sym.st_value; + prog_sz = sym->st_size; + sec_off = sym->st_value; - name = elf_sym_str(obj, sym.st_name); + name = elf_sym_str(obj, sym->st_name); if (!name) { pr_warn("sec '%s': failed to get symbol name for offset %zu\n", sec_name, sec_off); @@ -698,7 +746,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); return -ENOTSUP; } @@ -731,9 +779,9 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (GELF_ST_BIND(sym.st_info) != STB_LOCAL - && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN - || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL + && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -1101,6 +1149,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1134,24 +1183,21 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; - obj->efile.data_shndx = -1; - obj->efile.rodata_shndx = -1; - obj->efile.bss_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; - obj->rodata_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; INIT_LIST_HEAD(&obj->list); - list_add(&obj->list, &bpf_objects_list); + if (!strict) + list_add(&obj->list, &bpf_objects_list); return obj; } static void bpf_object__elf_finish(struct bpf_object *obj) { - if (!obj_elf_valid(obj)) + if (!obj->efile.elf) return; if (obj->efile.elf) { @@ -1159,13 +1205,10 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; - obj->efile.data = NULL; - obj->efile.rodata = NULL; - obj->efile.bss = NULL; obj->efile.st_ops_data = NULL; - zfree(&obj->efile.reloc_sects); - obj->efile.nr_reloc_sects = 0; + zfree(&obj->efile.secs); + obj->efile.sec_cnt = 0; zclose(obj->efile.fd); obj->efile.obj_buf = NULL; obj->efile.obj_buf_sz = 0; @@ -1173,10 +1216,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj) static int bpf_object__elf_init(struct bpf_object *obj) { + Elf64_Ehdr *ehdr; int err = 0; - GElf_Ehdr *ep; + Elf *elf; - if (obj_elf_valid(obj)) { + if (obj->efile.elf) { pr_warn("elf: init internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -1186,10 +1230,9 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, - obj->efile.obj_buf_sz); + elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { - obj->efile.fd = open(obj->path, O_RDONLY); + obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE], *cp; @@ -1199,23 +1242,37 @@ static int bpf_object__elf_init(struct bpf_object *obj) return err; } - obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); + elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); } - if (!obj->efile.elf) { + if (!elf) { pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__LIBELF; goto errout; } - if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + obj->efile.elf = elf; + + if (elf_kind(elf) != ELF_K_ELF) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); + goto errout; + } + + if (gelf_getclass(elf) != ELFCLASS64) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); + goto errout; + } + + obj->efile.ehdr = ehdr = elf64_getehdr(elf); + if (!obj->efile.ehdr) { pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; goto errout; } - ep = &obj->efile.ehdr; - if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1223,7 +1280,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { + if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1231,8 +1288,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Old LLVM set e_machine to EM_NONE */ - if (ep->e_type != ET_REL || - (ep->e_machine && ep->e_machine != EM_BPF)) { + if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; @@ -1246,11 +1302,11 @@ errout: static int bpf_object__check_endianness(struct bpf_object *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) return 0; -#elif __BYTE_ORDER == __BIG_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) return 0; #else # error "Unrecognized __BYTE_ORDER__" @@ -1290,41 +1346,27 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size) +static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { int ret = -ENOENT; + Elf_Data *data; + Elf_Scn *scn; *size = 0; - if (!name) { + if (!name) return -EINVAL; - } else if (!strcmp(name, DATA_SEC)) { - if (obj->efile.data) - *size = obj->efile.data->d_size; - } else if (!strcmp(name, BSS_SEC)) { - if (obj->efile.bss) - *size = obj->efile.bss->d_size; - } else if (!strcmp(name, RODATA_SEC)) { - if (obj->efile.rodata) - *size = obj->efile.rodata->d_size; - } else if (!strcmp(name, STRUCT_OPS_SEC)) { - if (obj->efile.st_ops_data) - *size = obj->efile.st_ops_data->d_size; - } else { - Elf_Scn *scn = elf_sec_by_name(obj, name); - Elf_Data *data = elf_sec_data(obj, scn); - if (data) { - ret = 0; /* found it */ - *size = data->d_size; - } + scn = elf_sec_by_name(obj, name); + data = elf_sec_data(obj, scn); + if (data) { + ret = 0; /* found it */ + *size = data->d_size; } return *size ? 0 : ret; } -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off) +static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) { Elf_Data *symbols = obj->efile.symbols; const char *sname; @@ -1333,23 +1375,20 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, if (!name || !off) return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { - GElf_Sym sym; + for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { + Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || - GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || + ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) continue; - sname = elf_sym_str(obj, sym.st_name); + sname = elf_sym_str(obj, sym->st_name); if (!sname) { - pr_warn("failed to get sym name string for var %s\n", - name); + pr_warn("failed to get sym name string for var %s\n", name); return -EIO; } if (strcmp(name, sname) == 0) { - *off = sym.st_value; + *off = sym->st_value; return 0; } } @@ -1401,17 +1440,55 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) return map_sz; } -static char *internal_map_name(struct bpf_object *obj, - enum libbpf_map_type type) +static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; - const char *sfx = libbpf_type_to_btf_name[type]; - int sfx_len = max((size_t)7, strlen(sfx)); - int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, - strlen(obj->name)); + int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); + + /* This is one of the more confusing parts of libbpf for various + * reasons, some of which are historical. The original idea for naming + * internal names was to include as much of BPF object name prefix as + * possible, so that it can be distinguished from similar internal + * maps of a different BPF object. + * As an example, let's say we have bpf_object named 'my_object_name' + * and internal map corresponding to '.rodata' ELF section. The final + * map name advertised to user and to the kernel will be + * 'my_objec.rodata', taking first 8 characters of object name and + * entire 7 characters of '.rodata'. + * Somewhat confusingly, if internal map ELF section name is shorter + * than 7 characters, e.g., '.bss', we still reserve 7 characters + * for the suffix, even though we only have 4 actual characters, and + * resulting map will be called 'my_objec.bss', not even using all 15 + * characters allowed by the kernel. Oh well, at least the truncated + * object name is somewhat consistent in this case. But if the map + * name is '.kconfig', we'll still have entirety of '.kconfig' added + * (8 chars) and thus will be left with only first 7 characters of the + * object name ('my_obje'). Happy guessing, user, that the final map + * name will be "my_obje.kconfig". + * Now, with libbpf starting to support arbitrarily named .rodata.* + * and .data.* data sections, it's possible that ELF section name is + * longer than allowed 15 chars, so we now need to be careful to take + * only up to 15 first characters of ELF name, taking no BPF object + * name characters at all. So '.rodata.abracadabra' will result in + * '.rodata.abracad' kernel and user-visible name. + * We need to keep this convoluted logic intact for .data, .bss and + * .rodata maps, but for new custom .data.custom and .rodata.custom + * maps we use their ELF names as is, not prepending bpf_object name + * in front. We still need to truncate them to 15 characters for the + * kernel. Full name can be recovered for such maps by using DATASEC + * BTF type associated with such map's value type, though. + */ + if (sfx_len >= BPF_OBJ_NAME_LEN) + sfx_len = BPF_OBJ_NAME_LEN - 1; + + /* if there are two or more dots in map name, it's a custom dot map */ + if (strchr(real_name + 1, '.') != NULL) + pfx_len = 0; + else + pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, - sfx_len, libbpf_type_to_btf_name[type]); + sfx_len, real_name); /* sanitise map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) @@ -1423,7 +1500,7 @@ static char *internal_map_name(struct bpf_object *obj, static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, void *data, size_t data_sz) + const char *real_name, int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; @@ -1436,9 +1513,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - map->name = internal_map_name(obj, type); - if (!map->name) { - pr_warn("failed to alloc map name\n"); + map->real_name = strdup(real_name); + map->name = internal_map_name(obj, real_name); + if (!map->real_name || !map->name) { + zfree(&map->real_name); + zfree(&map->name); return -ENOMEM; } @@ -1461,6 +1540,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->mmaped = NULL; pr_warn("failed to alloc map '%s' content buffer: %d\n", map->name, err); + zfree(&map->real_name); zfree(&map->name); return err; } @@ -1474,34 +1554,43 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, static int bpf_object__init_global_data_maps(struct bpf_object *obj) { - int err; + struct elf_sec_desc *sec_desc; + const char *sec_name; + int err = 0, sec_idx; /* * Populate obj->maps with libbpf internal maps. */ - if (obj->efile.data_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - obj->efile.data_shndx, - obj->efile.data->d_buf, - obj->efile.data->d_size); - if (err) - return err; - } - if (obj->efile.rodata_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - obj->efile.rodata_shndx, - obj->efile.rodata->d_buf, - obj->efile.rodata->d_size); - if (err) - return err; - - obj->rodata_map_idx = obj->nr_maps - 1; - } - if (obj->efile.bss_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - obj->efile.bss_shndx, - NULL, - obj->efile.bss->d_size); + for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { + sec_desc = &obj->efile.secs[sec_idx]; + + switch (sec_desc->sec_type) { + case SEC_DATA: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, + sec_name, sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_RODATA: + obj->has_rodata = true; + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, + sec_name, sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_BSS: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, + sec_name, sec_idx, + NULL, + sec_desc->data->d_size); + break; + default: + /* skip */ + break; + } if (err) return err; } @@ -1664,7 +1753,7 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, void *ext_val; __u64 num; - if (strncmp(buf, "CONFIG_", 7)) + if (!str_has_pfx(buf, "CONFIG_")) return 0; sep = strchr(buf, '='); @@ -1798,7 +1887,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, - obj->efile.symbols_shndx, + ".kconfig", obj->efile.symbols_shndx, NULL, map_sz); if (err) return err; @@ -1836,13 +1925,13 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) * * TODO: Detect array of map and report error. */ - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(symbols, i, &sym)) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (sym.st_shndx != obj->efile.maps_shndx) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; nr_maps++; } @@ -1859,39 +1948,38 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) /* Fill obj->maps using data in "maps" section. */ for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); const char *map_name; struct bpf_map_def *def; struct bpf_map *map; - if (!gelf_getsym(symbols, i, &sym)) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (sym.st_shndx != obj->efile.maps_shndx) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; map = bpf_object__add_map(obj); if (IS_ERR(map)) return PTR_ERR(map); - map_name = elf_sym_str(obj, sym.st_name); + map_name = elf_sym_str(obj, sym->st_name); if (!map_name) { pr_warn("failed to get map #%d name sym string for obj %s\n", i, obj->path); return -LIBBPF_ERRNO__FORMAT; } - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION - || GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; } map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym.st_shndx; - map->sec_offset = sym.st_value; + map->sec_idx = sym->st_shndx; + map->sec_offset = sym->st_value; pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - if (sym.st_value + map_def_sz > data->d_size) { + if (sym->st_value + map_def_sz > data->d_size) { pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -1899,11 +1987,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map->name = strdup(map_name); if (!map->name) { - pr_warn("failed to alloc map name\n"); + pr_warn("map '%s': failed to alloc map name\n", map_name); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + def = (struct bpf_map_def *)(data->d_buf + sym->st_value); /* * If the definition of the map in the object file fits in * bpf_map_def, copy it. Any extra fields in our version @@ -1987,6 +2075,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; + case BTF_KIND_DECL_TAG: return "decl_tag"; default: return "unknown"; } } @@ -2236,6 +2325,13 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, } map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; + } else if (strcmp(name, "map_extra") == 0) { + __u32 map_extra; + + if (!get_map_field_int(map_name, btf, m, &map_extra)) + return -EINVAL; + map_def->map_extra = map_extra; + map_def->parts |= MAP_DEF_MAP_EXTRA; } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", map_name, name); @@ -2260,6 +2356,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->def.value_size = def->value_size; map->def.max_entries = def->max_entries; map->def.map_flags = def->map_flags; + map->map_extra = def->map_extra; map->numa_node = def->numa_node; map->btf_key_type_id = def->key_type_id; @@ -2283,7 +2380,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def if (def->parts & MAP_DEF_MAX_ENTRIES) pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); if (def->parts & MAP_DEF_MAP_FLAGS) - pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); + pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); + if (def->parts & MAP_DEF_MAP_EXTRA) + pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, + (unsigned long long)def->map_extra); if (def->parts & MAP_DEF_PINNING) pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); if (def->parts & MAP_DEF_NUMA_NODE) @@ -2420,8 +2520,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return -EINVAL; } - nr_types = btf__get_nr_types(obj->btf); - for (i = 1; i <= nr_types; i++) { + nr_types = btf__type_cnt(obj->btf); + for (i = 1; i < nr_types; i++) { t = btf__type_by_id(obj->btf, i); if (!btf_is_datasec(t)) continue; @@ -2472,12 +2572,13 @@ static int bpf_object__init_maps(struct bpf_object *obj, static bool section_have_execinstr(struct bpf_object *obj, int idx) { - GElf_Shdr sh; + Elf64_Shdr *sh; - if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh)) + sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); + if (!sh) return false; - return sh.sh_flags & SHF_EXECINSTR; + return sh->sh_flags & SHF_EXECINSTR; } static bool btf_needs_sanitization(struct bpf_object *obj) @@ -2486,8 +2587,9 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float; + return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2496,14 +2598,15 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); struct btf_type *t; int i, j, vlen; - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - if (!has_datasec && btf_is_var(t)) { - /* replace VAR with INT */ + if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { + /* replace VAR/DECL_TAG with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* * using size = 1 is the safest choice, 4 will be too @@ -2610,6 +2713,104 @@ out: return 0; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = btf_vlen(t); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + const struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + + ret = find_elf_sec_sz(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = btf_var(t_var); + + if (!btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = find_elf_var_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", + name); + return -ENOENT; + } + + vsi->offset = off; + } + +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i, n = btf__type_cnt(btf); + + for (i = 1; i < n; i++) { + struct btf_type *t = btf_type_by_id(btf, i); + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (btf_is_datasec(t)) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return libbpf_err(err); +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + return btf_finalize_data(obj, btf); +} + static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -2617,7 +2818,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) if (!obj->btf) return 0; - err = btf__finalize_data(obj, obj->btf); + err = btf_finalize_data(obj, obj->btf); if (err) { pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); return err; @@ -2727,8 +2928,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) continue; - n = btf__get_nr_types(obj->btf); - for (j = 1; j <= n; j++) { + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { t = btf_type_by_id(obj->btf, j); if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) continue; @@ -2748,7 +2949,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) __u32 sz; /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__get_raw_data(obj->btf, &sz); + raw_data = btf__raw_data(obj->btf, &sz); kern_btf = btf__new(raw_data, sz); err = libbpf_get_error(kern_btf); if (err) @@ -2761,7 +2962,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (obj->gen_loader) { __u32 raw_size = 0; - const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); + const void *raw_data = btf__raw_data(kern_btf, &raw_size); if (!raw_data) return -ENOMEM; @@ -2853,32 +3054,36 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) return NULL; } -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) { + Elf64_Shdr *shdr; + if (!scn) - return -EINVAL; + return NULL; - if (gelf_getshdr(scn, hdr) != hdr) { + shdr = elf64_getshdr(scn); + if (!shdr) { pr_warn("elf: failed to get section(%zu) header from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); - return -EINVAL; + return NULL; } - return 0; + return shdr; } static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) { const char *name; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!scn) return NULL; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return NULL; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) { pr_warn("elf: failed to get section(%zu) name from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); @@ -2906,13 +3111,29 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) +{ + if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) + return NULL; + + return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; +} + +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) +{ + if (idx >= data->d_size / sizeof(Elf64_Rel)) + return NULL; + + return (Elf64_Rel *)data->d_buf + idx; +} + static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ - return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; + return str_has_pfx(name, ".debug_"); } -static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) +static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) { /* no special handling of .strtab */ if (hdr->sh_type == SHT_STRTAB) @@ -2931,7 +3152,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) if (is_sec_name_dwarf(name)) return true; - if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { + if (str_has_pfx(name, ".rel")) { name += sizeof(".rel") - 1; /* DWARF section relocations */ if (is_sec_name_dwarf(name)) @@ -2960,6 +3181,7 @@ static int cmp_progs(const void *_a, const void *_b) static int bpf_object__elf_collect(struct bpf_object *obj) { + struct elf_sec_desc *sec_desc; Elf *elf = obj->efile.elf; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; @@ -2967,17 +3189,27 @@ static int bpf_object__elf_collect(struct bpf_object *obj) const char *name; Elf_Data *data; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; + + /* ELF section indices are 1-based, so allocate +1 element to keep + * indexing simple. Also include 0th invalid section into sec_cnt for + * simpler and more traditional iteration logic. + */ + obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; + obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); + if (!obj->efile.secs) + return -ENOMEM; /* a bunch of ELF parsing functionality depends on processing symbols, * so do the first pass and find the symbol table */ scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - if (sh.sh_type == SHT_SYMTAB) { + if (sh->sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warn("elf: multiple symbol tables in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; @@ -2987,24 +3219,34 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!data) return -LIBBPF_ERRNO__FORMAT; + idx = elf_ndxscn(scn); + obj->efile.symbols = data; - obj->efile.symbols_shndx = elf_ndxscn(scn); - obj->efile.strtabidx = sh.sh_link; + obj->efile.symbols_shndx = idx; + obj->efile.strtabidx = sh->sh_link; } } + if (!obj->efile.symbols) { + pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", + obj->path); + return -ENOENT; + } + scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - idx++; + idx = elf_ndxscn(scn); + sec_desc = &obj->efile.secs[idx]; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) return -LIBBPF_ERRNO__FORMAT; - if (ignore_elf_section(&sh, name)) + if (ignore_elf_section(sh, name)) continue; data = elf_sec_data(obj, scn); @@ -3013,8 +3255,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", idx, name, (unsigned long)data->d_size, - (int)sh.sh_link, (unsigned long)sh.sh_flags, - (int)sh.sh_type); + (int)sh->sh_link, (unsigned long)sh->sh_flags, + (int)sh->sh_type); if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); @@ -3032,21 +3274,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj) btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; - } else if (sh.sh_type == SHT_SYMTAB) { + } else if (sh->sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ - } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { - if (sh.sh_flags & SHF_EXECINSTR) { + } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh->sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; err = bpf_object__add_programs(obj, data, name, idx); if (err) return err; - } else if (strcmp(name, DATA_SEC) == 0) { - obj->efile.data = data; - obj->efile.data_shndx = idx; - } else if (strcmp(name, RODATA_SEC) == 0) { - obj->efile.rodata = data; - obj->efile.rodata_shndx = idx; + } else if (strcmp(name, DATA_SEC) == 0 || + str_has_pfx(name, DATA_SEC ".")) { + sec_desc->sec_type = SEC_DATA; + sec_desc->shdr = sh; + sec_desc->data = data; + } else if (strcmp(name, RODATA_SEC) == 0 || + str_has_pfx(name, RODATA_SEC ".")) { + sec_desc->sec_type = SEC_RODATA; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; @@ -3054,37 +3300,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); } - } else if (sh.sh_type == SHT_REL) { - int nr_sects = obj->efile.nr_reloc_sects; - void *sects = obj->efile.reloc_sects; - int sec = sh.sh_info; /* points to other section */ + } else if (sh->sh_type == SHT_REL) { + int targ_sec_idx = sh->sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec) && + if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", - idx, name, sec, - elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>"); + idx, name, targ_sec_idx, + elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); continue; } - sects = libbpf_reallocarray(sects, nr_sects + 1, - sizeof(*obj->efile.reloc_sects)); - if (!sects) - return -ENOMEM; - - obj->efile.reloc_sects = sects; - obj->efile.nr_reloc_sects++; - - obj->efile.reloc_sects[nr_sects].shdr = sh; - obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { - obj->efile.bss = data; - obj->efile.bss_shndx = idx; + sec_desc->sec_type = SEC_RELO; + sec_desc->shdr = sh; + sec_desc->data = data; + } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + sec_desc->sec_type = SEC_BSS; + sec_desc->shdr = sh; + sec_desc->data = data; } else { pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, - (size_t)sh.sh_size); + (size_t)sh->sh_size); } } @@ -3100,19 +3338,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return bpf_object__init_btf(obj, btf_data, btf_ext_data); } -static bool sym_is_extern(const GElf_Sym *sym) +static bool sym_is_extern(const Elf64_Sym *sym) { - int bind = GELF_ST_BIND(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ return sym->st_shndx == SHN_UNDEF && (bind == STB_GLOBAL || bind == STB_WEAK) && - GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; + ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; } -static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx) +static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) { - int bind = GELF_ST_BIND(sym->st_info); - int type = GELF_ST_TYPE(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); + int type = ELF64_ST_TYPE(sym->st_info); /* in .text section */ if (sym->st_shndx != text_shndx) @@ -3135,8 +3373,8 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_var(t) && !btf_is_func(t)) @@ -3167,8 +3405,8 @@ static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_datasec(t)) @@ -3252,8 +3490,8 @@ static int find_int_btf_id(const struct btf *btf) const struct btf_type *t; int i, n; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (btf_is_int(t) && btf_int_bits(t) == 32) @@ -3310,30 +3548,31 @@ static int bpf_object__collect_externs(struct bpf_object *obj) int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!obj->efile.symbols) return 0; scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); if (dummy_var_btf_id < 0) return dummy_var_btf_id; - n = sh.sh_size / sh.sh_entsize; + n = sh->sh_size / sh->sh_entsize; pr_debug("looking for externs among %d symbols...\n", n); for (i = 0; i < n; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(obj->efile.symbols, i, &sym)) + if (!sym) return -LIBBPF_ERRNO__FORMAT; - if (!sym_is_extern(&sym)) + if (!sym_is_extern(sym)) continue; - ext_name = elf_sym_str(obj, sym.st_name); + ext_name = elf_sym_str(obj, sym->st_name); if (!ext_name || !ext_name[0]) continue; @@ -3355,7 +3594,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) t = btf__type_by_id(obj->btf, ext->btf_id); ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; - ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { @@ -3393,11 +3632,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - if (btf_is_func(t) && ext->is_weak) { - pr_warn("extern weak function %s is unsupported\n", - ext->name); - return -ENOTSUP; - } ksym_sec = sec; ext->type = EXT_KSYM; skip_mods_and_typedefs(obj->btf, t->type, @@ -3565,9 +3799,14 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.data_shndx || - shndx == obj->efile.bss_shndx || - shndx == obj->efile.rodata_shndx; + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: + case SEC_DATA: + case SEC_RODATA: + return true; + default: + return false; + } } static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, @@ -3580,22 +3819,25 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, static enum libbpf_map_type bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) { - if (shndx == obj->efile.data_shndx) - return LIBBPF_MAP_DATA; - else if (shndx == obj->efile.bss_shndx) + if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; + + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: return LIBBPF_MAP_BSS; - else if (shndx == obj->efile.rodata_shndx) + case SEC_DATA: + return LIBBPF_MAP_DATA; + case SEC_RODATA: return LIBBPF_MAP_RODATA; - else if (shndx == obj->efile.symbols_shndx) - return LIBBPF_MAP_KCONFIG; - else + default: return LIBBPF_MAP_UNSPEC; + } } static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, - const GElf_Sym *sym, const GElf_Rel *rel) + const Elf64_Sym *sym, const Elf64_Rel *rel) { struct bpf_insn *insn = &prog->insns[insn_idx]; size_t map_idx, nr_maps = prog->obj->nr_maps; @@ -3612,7 +3854,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (sym_is_extern(sym)) { - int sym_idx = GELF_R_SYM(rel->r_info); + int sym_idx = ELF64_R_SYM(rel->r_info); int i, n = obj->nr_extern; struct extern_desc *ext; @@ -3725,7 +3967,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; - if (map->libbpf_type != type) + if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) continue; pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", prog->name, map_idx, map->name, map->sec_idx, @@ -3777,9 +4019,8 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, } static int -bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) +bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { - Elf_Data *symbols = obj->efile.symbols; const char *relo_sec_name, *sec_name; size_t sec_idx = shdr->sh_info; struct bpf_program *prog; @@ -3789,8 +4030,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data __u32 insn_idx; Elf_Scn *scn; Elf_Data *scn_data; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -3805,33 +4046,36 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) { + if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / BPF_INSN_SZ; + insn_idx = rel->r_offset / BPF_INSN_SZ; /* relocations against static functions are recorded as * relocations against the section that contains a function; * in such case, symbol will be STT_SECTION and sym.st_name * will point to empty string (0), so fetch section name * instead */ - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) - sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) + sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); else - sym_name = elf_sym_str(obj, sym.st_name); + sym_name = elf_sym_str(obj, sym->st_name); sym_name = sym_name ?: "<?"; pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", @@ -3853,7 +4097,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data /* adjust insn_idx to local BPF program frame of reference */ insn_idx -= prog->sec_insn_off; err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], - insn_idx, sym_name, &sym, &rel); + insn_idx, sym_name, sym, rel); if (err) return err; @@ -3885,8 +4129,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * LLVM annotates global data differently in BTF, that is, * only as '.data', '.bss' or '.rodata'. */ - ret = btf__find_by_name(obj->btf, - libbpf_type_to_btf_name[map->libbpf_type]); + ret = btf__find_by_name(obj->btf, map->real_name); } if (ret < 0) return ret; @@ -3979,6 +4222,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; map->reused = true; + map->map_extra = info.map_extra; return 0; @@ -4207,6 +4451,23 @@ static int probe_kern_btf_float(void) strs, sizeof(strs))); } +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + static int probe_kern_array_mmap(void) { struct bpf_create_map_attr attr = { @@ -4423,6 +4684,9 @@ static struct kern_feature_desc { [FEAT_PERF_LINK] = { "BPF perf link support", probe_perf_link, }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, }; static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4473,7 +4737,8 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags); + map_info.map_flags == map->def.map_flags && + map_info.map_extra == map->map_extra); } static int @@ -4556,7 +4821,7 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_attr create_attr; + struct bpf_create_map_params create_attr; struct bpf_map_def *def = &map->def; int err = 0; @@ -4570,6 +4835,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; + create_attr.map_extra = map->map_extra; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4613,6 +4879,30 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.inner_map_fd = map->inner_map_fd; } + switch (def->type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_RINGBUF: + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + default: + break; + } + if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. @@ -4620,7 +4910,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b */ map->fd = 0; } else { - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4635,7 +4925,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } err = map->fd < 0 ? -errno : 0; @@ -4812,8 +5102,8 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t targ_essent_len; int n, i; - n = btf__get_nr_types(targ_btf); - for (i = targ_start_id; i <= n; i++) { + n = btf__type_cnt(targ_btf); + for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); if (btf_kind(t) != btf_kind(local_cand->t)) continue; @@ -4988,7 +5278,7 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l err = bpf_core_add_cands(&local_cand, local_essent_len, obj->btf_modules[i].btf, obj->btf_modules[i].name, - btf__get_nr_types(obj->btf_vmlinux) + 1, + btf__type_cnt(obj->btf_vmlinux), cands); if (err) goto err_out; @@ -5132,7 +5422,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * relocated, so it's enough to just subtract in-section offset */ insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx > prog->insns_cnt) + if (insn_idx >= prog->insns_cnt) return -EINVAL; insn = &prog->insns[insn_idx]; @@ -5326,7 +5616,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; - insn[0].imm = ext->ksym.kernel_btf_id; + if (ext->is_set) { + insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; + } else { /* unresolved weak kfunc */ + insn[0].imm = 0; + insn[0].off = 0; + } break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -5851,10 +6147,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) } static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data); + Elf64_Shdr *shdr, Elf_Data *data); static int bpf_object__collect_map_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); int i, j, nrels, new_sz; @@ -5863,10 +6159,9 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; const char *name, *mname; - Elf_Data *symbols; unsigned int moff; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; void *tmp; if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) @@ -5875,28 +6170,30 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, if (!sec) return -EINVAL; - symbols = obj->efile.symbols; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn(".maps relo #%d: failed to get ELF relo\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn(".maps relo #%d: symbol %zx not found\n", - i, (size_t)GELF_R_SYM(rel.r_info)); + i, (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: "<?>"; - if (sym.st_shndx != obj->efile.btf_maps_shndx) { + name = elf_sym_str(obj, sym->st_name) ?: "<?>"; + if (sym->st_shndx != obj->efile.btf_maps_shndx) { pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", i, name); return -LIBBPF_ERRNO__RELOC; } - pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n", - i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value, - (size_t)rel.r_offset, sym.st_name, name); + pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, + (size_t)rel->r_offset, sym->st_name, name); for (j = 0; j < obj->nr_maps; j++) { map = &obj->maps[j]; @@ -5904,13 +6201,13 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, continue; vi = btf_var_secinfos(sec) + map->btf_var_idx; - if (vi->offset <= rel.r_offset && - rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size) + if (vi->offset <= rel->r_offset && + rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) break; } if (j == obj->nr_maps) { - pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n", - i, name, (size_t)rel.r_offset); + pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", + i, name, (size_t)rel->r_offset); return -EINVAL; } @@ -5937,10 +6234,10 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; - if (rel.r_offset - vi->offset < moff) + if (rel->r_offset - vi->offset < moff) return -EINVAL; - moff = rel.r_offset - vi->offset - moff; + moff = rel->r_offset - vi->offset - moff; /* here we use BPF pointer size, which is always 64 bit, as we * are parsing ELF that was built for BPF target */ @@ -5985,10 +6282,18 @@ static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; - for (i = 0; i < obj->efile.nr_reloc_sects; i++) { - GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; - Elf_Data *data = obj->efile.reloc_sects[i].data; - int idx = shdr->sh_info; + for (i = 0; i < obj->efile.sec_cnt; i++) { + struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; + Elf64_Shdr *shdr; + Elf_Data *data; + int idx; + + if (sec_desc->sec_type != SEC_RELO) + continue; + + shdr = sec_desc->shdr; + data = sec_desc->data; + idx = shdr->sh_info; if (shdr->sh_type != SHT_REL) { pr_warn("internal error at %d\n", __LINE__); @@ -6064,15 +6369,58 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program return 0; } +static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, + int *btf_obj_fd, int *btf_type_id); + +/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ +static int libbpf_preload_prog(struct bpf_program *prog, + struct bpf_prog_load_params *attr, long cookie) +{ + enum sec_def_flags def = cookie; + + /* old kernels might not support specifying expected_attach_type */ + if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) + attr->expected_attach_type = 0; + + if (def & SEC_SLEEPABLE) + attr->prog_flags |= BPF_F_SLEEPABLE; + + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + int btf_obj_fd = 0, btf_type_id = 0, err; + const char *attach_name; + + attach_name = strchr(prog->sec_name, '/') + 1; + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); + if (err) + return err; + + /* cache resolved BTF FD and BTF type ID in the prog */ + prog->attach_btf_obj_fd = btf_obj_fd; + prog->attach_btf_id = btf_type_id; + + /* but by now libbpf common logic is not utilizing + * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because + * this callback is called after attrs were populated by + * libbpf, so this callback has to update attr explicitly here + */ + attr->attach_btf_obj_fd = btf_obj_fd; + attr->attach_btf_id = btf_type_id; + } + return 0; +} + static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) { struct bpf_prog_load_params load_attr = {}; + struct bpf_object *obj = prog->obj; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL; - int btf_fd, ret; + int btf_fd, ret, err; if (prog->type == BPF_PROG_TYPE_UNSPEC) { /* @@ -6088,29 +6436,22 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, return -EINVAL; load_attr.prog_type = prog->type; - /* old kernels might not support specifying expected_attach_type */ - if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def && - prog->sec_def->is_exp_attach_type_optional) - load_attr.expected_attach_type = 0; - else - load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(prog->obj, FEAT_PROG_NAME)) + load_attr.expected_attach_type = prog->expected_attach_type; + if (kernel_supports(obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; load_attr.license = license; load_attr.attach_btf_id = prog->attach_btf_id; - if (prog->attach_prog_fd) - load_attr.attach_prog_fd = prog->attach_prog_fd; - else - load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { + btf_fd = bpf_object__btf_fd(obj); + if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -6121,10 +6462,21 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + load_attr.fd_array = obj->fd_array; - if (prog->obj->gen_loader) { - bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, - prog - prog->obj->programs); + /* adjust load_attr if sec_def provides custom preload callback */ + if (prog->sec_def && prog->sec_def->preload_fn) { + err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); + if (err < 0) { + pr_warn("prog '%s': failed to prepare load attributes: %d\n", + prog->name, err); + return err; + } + } + + if (obj->gen_loader) { + bpf_gen__prog_load(obj->gen_loader, &load_attr, + prog - obj->programs); *pfd = -1; return 0; } @@ -6145,16 +6497,21 @@ retry_load: if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); - if (prog->obj->rodata_map_idx >= 0 && - kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { - struct bpf_map *rodata_map = - &prog->obj->maps[prog->obj->rodata_map_idx]; + if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { + struct bpf_map *map; + int i; - if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind .rodata map: %s\n", - prog->name, cp); - /* Don't fail hard if can't bind rodata. */ + for (i = 0; i < obj->nr_maps; i++) { + map = &prog->obj->maps[i]; + if (map->libbpf_type != LIBBPF_MAP_RODATA) + continue; + + if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': failed to bind .rodata map: %s\n", + prog->name, cp); + /* Don't fail hard if can't bind rodata. */ + } } } @@ -6218,16 +6575,13 @@ static int bpf_program__record_externs(struct bpf_program *prog) case RELO_EXTERN_VAR: if (ext->type != EXT_KSYM) continue; - if (!ext->ksym.type_id) { - pr_warn("typeless ksym %s is not supported yet\n", - ext->name); - return -ENOTSUP; - } - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, !ext->ksym.type_id, + BTF_KIND_VAR, relo->insn_idx); break; case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, false, BTF_KIND_FUNC, relo->insn_idx); break; default: @@ -6237,8 +6591,6 @@ static int bpf_program__record_externs(struct bpf_program *prog) return 0; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); - int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i; @@ -6248,19 +6600,6 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) return libbpf_err(-EINVAL); } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { - int btf_obj_fd = 0, btf_type_id = 0; - - err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); - if (err) - return libbpf_err(err); - - prog->attach_btf_obj_fd = btf_obj_fd; - prog->attach_btf_id = btf_type_id; - } - if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warn("Internal error: can't load program '%s'\n", @@ -6328,8 +6667,6 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) out: if (err) pr_warn("failed to load program '%s'\n", prog->name); - zfree(&prog->insns); - prog->insns_cnt = 0; return libbpf_err(err); } @@ -6367,12 +6704,51 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) static const struct bpf_sec_def *find_sec_def(const char *sec_name); +static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) +{ + struct bpf_program *prog; + int err; + + bpf_object__for_each_program(prog, obj) { + prog->sec_def = find_sec_def(prog->sec_name); + if (!prog->sec_def) { + /* couldn't guess, but user might manually specify */ + pr_debug("prog '%s': unrecognized ELF section name '%s'\n", + prog->name, prog->sec_name); + continue; + } + + bpf_program__set_type(prog, prog->sec_def->prog_type); + bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || + prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); +#pragma GCC diagnostic pop + + /* sec_def can have custom callback which should be called + * after bpf_program is initialized to adjust its properties + */ + if (prog->sec_def->init_fn) { + err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); + if (err < 0) { + pr_warn("prog '%s': failed to initialize: %d\n", + prog->name, err); + return err; + } + } + } + + return 0; +} + static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { const char *obj_name, *kconfig, *btf_tmp_path; - struct bpf_program *prog; struct bpf_object *obj; char tmp_name[64]; int err; @@ -6430,30 +6806,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object__finalize_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); if (err) goto out; - bpf_object__elf_finish(obj); - bpf_object__for_each_program(prog, obj) { - prog->sec_def = find_sec_def(prog->sec_name); - if (!prog->sec_def) { - /* couldn't guess, but user might manually specify */ - pr_debug("prog '%s': unrecognized ELF section name '%s'\n", - prog->name, prog->sec_name); - continue; - } - - if (prog->sec_def->is_sleepable) - prog->prog_flags |= BPF_F_SLEEPABLE; - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, - prog->sec_def->expected_attach_type); - - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - } + bpf_object__elf_finish(obj); return obj; out: @@ -6529,7 +6887,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } -int bpf_object__unload(struct bpf_object *obj) +static int bpf_object_unload(struct bpf_object *obj) { size_t i; @@ -6548,6 +6906,8 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); + static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -6621,13 +6981,14 @@ out: static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, - int *res_btf_fd) + struct module_btf **res_mod_btf) { - int i, id, btf_fd, err; + struct module_btf *mod_btf; struct btf *btf; + int i, id, err; btf = obj->btf_vmlinux; - btf_fd = 0; + mod_btf = NULL; id = btf__find_by_name_kind(btf, ksym_name, kind); if (id == -ENOENT) { @@ -6636,10 +6997,10 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return err; for (i = 0; i < obj->btf_module_cnt; i++) { - btf = obj->btf_modules[i].btf; - /* we assume module BTF FD is always >0 */ - btf_fd = obj->btf_modules[i].fd; - id = btf__find_by_name_kind(btf, ksym_name, kind); + /* we assume module_btf's BTF FD is always >0 */ + mod_btf = &obj->btf_modules[i]; + btf = mod_btf->btf; + id = btf__find_by_name_kind_own(btf, ksym_name, kind); if (id != -ENOENT) break; } @@ -6648,7 +7009,7 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return -ESRCH; *res_btf = btf; - *res_btf_fd = btf_fd; + *res_mod_btf = mod_btf; return id; } @@ -6657,14 +7018,15 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, { const struct btf_type *targ_var, *targ_type; __u32 targ_type_id, local_type_id; + struct module_btf *mod_btf = NULL; const char *targ_var_name; - int id, btf_fd = 0, err; struct btf *btf = NULL; + int id, err; - id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); - if (id == -ESRCH && ext->is_weak) { - return 0; - } else if (id < 0) { + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); + if (id < 0) { + if (id == -ESRCH && ext->is_weak) + return 0; pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ext->name); return id; @@ -6696,7 +7058,7 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, } ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; ext->ksym.kernel_btf_id = id; pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); @@ -6708,26 +7070,22 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, struct extern_desc *ext) { int local_func_proto_id, kfunc_proto_id, kfunc_id; + struct module_btf *mod_btf = NULL; const struct btf_type *kern_func; struct btf *kern_btf = NULL; - int ret, kern_btf_fd = 0; + int ret; local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, - &kern_btf, &kern_btf_fd); + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { - pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", + if (kfunc_id == -ESRCH && ext->is_weak) + return 0; + pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", ext->name); return kfunc_id; } - if (kern_btf != obj->btf_vmlinux) { - pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", - ext->name); - return -ENOTSUP; - } - kern_func = btf__type_by_id(kern_btf, kfunc_id); kfunc_proto_id = kern_func->type; @@ -6739,9 +7097,30 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, return -EINVAL; } + /* set index for module BTF fd in fd_array, if unset */ + if (mod_btf && !mod_btf->fd_array_idx) { + /* insn->off is s16 */ + if (obj->fd_array_cnt == INT16_MAX) { + pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", + ext->name, mod_btf->fd_array_idx); + return -E2BIG; + } + /* Cannot use index 0 for module BTF fd */ + if (!obj->fd_array_cnt) + obj->fd_array_cnt = 1; + + ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), + obj->fd_array_cnt + 1); + if (ret) + return ret; + mod_btf->fd_array_idx = obj->fd_array_cnt; + /* we assume module BTF FD is always >0 */ + obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; + } + ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = kern_btf_fd; ext->ksym.kernel_btf_id = kfunc_id; + ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); @@ -6807,8 +7186,7 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return err; pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && - strncmp(ext->name, "CONFIG_", 7) == 0) { + } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { need_config = true; } else if (ext->type == EXT_KSYM) { if (ext->ksym.type_id) @@ -6894,13 +7272,17 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->gen_loader) { /* reset FDs */ - btf__set_fd(obj->btf, -1); + if (obj->btf) + btf__set_fd(obj->btf, -1); for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) err = bpf_gen__finish(obj->gen_loader); } + /* clean up fd_array */ + zfree(&obj->fd_array); + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -6925,7 +7307,7 @@ out: if (obj->maps[i].pinned && !obj->maps[i].reused) bpf_map__unpin(&obj->maps[i], NULL); - bpf_object__unload(obj); + bpf_object_unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return libbpf_err(err); } @@ -6991,8 +7373,7 @@ static int check_path(const char *path) return err; } -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) { char *cp, errmsg[STRERR_BUFSIZE]; int err; @@ -7027,8 +7408,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) { int err; @@ -7056,6 +7436,12 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return 0; } +__attribute__((alias("bpf_program_pin_instance"))) +int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); + +__attribute__((alias("bpf_program_unpin_instance"))) +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); + int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; @@ -7080,7 +7466,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__pin_instance(prog, path, 0); + return bpf_program_pin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7096,7 +7482,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) goto err_unpin; } - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program_pin_instance(prog, buf, i); if (err) goto err_unpin; } @@ -7114,7 +7500,7 @@ err_unpin: else if (len >= PATH_MAX) continue; - bpf_program__unpin_instance(prog, buf, i); + bpf_program_unpin_instance(prog, buf, i); } rmdir(path); @@ -7142,7 +7528,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__unpin_instance(prog, path, 0); + return bpf_program_unpin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7155,7 +7541,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) return libbpf_err(-ENAMETOOLONG); - err = bpf_program__unpin_instance(prog, buf, i); + err = bpf_program_unpin_instance(prog, buf, i); if (err) return err; } @@ -7516,6 +7902,7 @@ static void bpf_map__destroy(struct bpf_map *map) } zfree(&map->name); + zfree(&map->real_name); zfree(&map->pin_path); if (map->fd >= 0) @@ -7534,7 +7921,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); - bpf_object__unload(obj); + bpf_object_unload(obj); btf__free(obj->btf); btf_ext__free(obj->btf_ext); @@ -7563,6 +7950,10 @@ struct bpf_object * bpf_object__next(struct bpf_object *prev) { struct bpf_object *next; + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); + + if (strict) + return NULL; if (!prev) next = list_first_entry(&bpf_objects_list, @@ -7669,6 +8060,12 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, struct bpf_program * bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) { + return bpf_object__next_program(obj, prev); +} + +struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) +{ struct bpf_program *prog = prev; do { @@ -7681,6 +8078,12 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) struct bpf_program * bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) { + return bpf_object__prev_program(obj, next); +} + +struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) +{ struct bpf_program *prog = next; do { @@ -7761,6 +8164,16 @@ size_t bpf_program__size(const struct bpf_program *prog) return prog->insns_cnt * BPF_INSN_SZ; } +const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) +{ + return prog->insns; +} + +size_t bpf_program__insn_cnt(const struct bpf_program *prog) +{ + return prog->insns_cnt; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -7868,223 +8281,145 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \ - attachable, attach_btf) \ - { \ - .sec = string, \ - .len = sizeof(string) - 1, \ - .prog_type = ptype, \ - .expected_attach_type = eatype, \ - .is_exp_attach_type_optional = eatype_optional, \ - .is_attachable = attachable, \ - .is_attach_btf = attach_btf, \ - } - -/* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) - -/* Programs that can be attached. */ -#define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0) - -/* Programs that must specify expected attach type at load time. */ -#define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0) - -/* Programs that use BTF to identify attach point */ -#define BPF_PROG_BTF(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1) - -/* Programs that can be attached but attach type can't be identified by section - * name. Kept for backward compatibility. - */ -#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) - -#define SEC_DEF(sec_pfx, ptype, ...) { \ +#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = sec_pfx, \ - .len = sizeof(sec_pfx) - 1, \ .prog_type = BPF_PROG_TYPE_##ptype, \ + .expected_attach_type = atype, \ + .cookie = (long)(flags), \ + .preload_fn = libbpf_preload_prog, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, - struct bpf_program *prog); -static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, - struct bpf_program *prog); +static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); +static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); static const struct bpf_sec_def section_defs[] = { - BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT, - BPF_SK_REUSEPORT_SELECT_OR_MIGRATE), - BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT, - BPF_SK_REUSEPORT_SELECT), - SEC_DEF("kprobe/", KPROBE, - .attach_fn = attach_kprobe), - BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - SEC_DEF("kretprobe/", KPROBE, - .attach_fn = attach_kprobe), - BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), - BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - SEC_DEF("tracepoint/", TRACEPOINT, - .attach_fn = attach_tp), - SEC_DEF("tp/", TRACEPOINT, - .attach_fn = attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, - .attach_fn = attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, - .attach_fn = attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, - .expected_attach_type = BPF_TRACE_RAW_TP, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fentry/", TRACING, - .expected_attach_type = BPF_TRACE_FENTRY, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fmod_ret/", TRACING, - .expected_attach_type = BPF_MODIFY_RETURN, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fexit/", TRACING, - .expected_attach_type = BPF_TRACE_FEXIT, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("fentry.s/", TRACING, - .expected_attach_type = BPF_TRACE_FENTRY, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, - .expected_attach_type = BPF_MODIFY_RETURN, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("fexit.s/", TRACING, - .expected_attach_type = BPF_TRACE_FEXIT, - .is_attach_btf = true, - .is_sleepable = true, - .attach_fn = attach_trace), - SEC_DEF("freplace/", EXT, - .is_attach_btf = true, - .attach_fn = attach_trace), - SEC_DEF("lsm/", LSM, - .is_attach_btf = true, - .expected_attach_type = BPF_LSM_MAC, - .attach_fn = attach_lsm), - SEC_DEF("lsm.s/", LSM, - .is_attach_btf = true, - .is_sleepable = true, - .expected_attach_type = BPF_LSM_MAC, - .attach_fn = attach_lsm), - SEC_DEF("iter/", TRACING, - .expected_attach_type = BPF_TRACE_ITER, - .is_attach_btf = true, - .attach_fn = attach_iter), - SEC_DEF("syscall", SYSCALL, - .is_sleepable = true), - BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, - BPF_XDP_DEVMAP), - BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, - BPF_XDP_CPUMAP), - BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, - BPF_XDP), - BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), - BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), - BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), - BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), - BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), - BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_INGRESS), - BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, - BPF_CGROUP_INET_EGRESS), - BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), - BPF_EAPROG_SEC("cgroup/sock_create", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_CREATE), - BPF_EAPROG_SEC("cgroup/sock_release", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_RELEASE), - BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET_SOCK_CREATE), - BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET4_POST_BIND), - BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, - BPF_CGROUP_INET6_POST_BIND), - BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, - BPF_CGROUP_DEVICE), - BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, - BPF_CGROUP_SOCK_OPS), - BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_PARSER), - BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, - BPF_SK_SKB_STREAM_VERDICT), - BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), - BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, - BPF_SK_MSG_VERDICT), - BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, - BPF_LIRC_MODE2), - BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, - BPF_FLOW_DISSECTOR), - BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_BIND), - BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_BIND), - BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_CONNECT), - BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_CONNECT), - BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_SENDMSG), - BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_SENDMSG), - BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP4_RECVMSG), - BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_UDP6_RECVMSG), - BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_GETPEERNAME), - BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_GETPEERNAME), - BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET4_GETSOCKNAME), - BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - BPF_CGROUP_INET6_GETSOCKNAME), - BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, - BPF_CGROUP_SYSCTL), - BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_GETSOCKOPT), - BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, - BPF_CGROUP_SETSOCKOPT), - BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), - BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, - BPF_SK_LOOKUP), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), + SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), + SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), }; -#undef BPF_PROG_SEC_IMPL -#undef BPF_PROG_SEC -#undef BPF_APROG_SEC -#undef BPF_EAPROG_SEC -#undef BPF_APROG_COMPAT -#undef SEC_DEF - #define MAX_TYPE_NAME_SIZE 32 static const struct bpf_sec_def *find_sec_def(const char *sec_name) { - int i, n = ARRAY_SIZE(section_defs); + const struct bpf_sec_def *sec_def; + enum sec_def_flags sec_flags; + int i, n = ARRAY_SIZE(section_defs), len; + bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; for (i = 0; i < n; i++) { - if (strncmp(sec_name, - section_defs[i].sec, section_defs[i].len)) + sec_def = §ion_defs[i]; + sec_flags = sec_def->cookie; + len = strlen(sec_def->sec); + + /* "type/" always has to have proper SEC("type/extras") form */ + if (sec_def->sec[len - 1] == '/') { + if (str_has_pfx(sec_name, sec_def->sec)) + return sec_def; continue; - return §ion_defs[i]; + } + + /* "type+" means it can be either exact SEC("type") or + * well-formed SEC("type/extras") with proper '/' separator + */ + if (sec_def->sec[len - 1] == '+') { + len--; + /* not even a prefix */ + if (strncmp(sec_name, sec_def->sec, len) != 0) + continue; + /* exact match or has '/' separator */ + if (sec_name[len] == '\0' || sec_name[len] == '/') + return sec_def; + continue; + } + + /* SEC_SLOPPY_PFX definitions are allowed to be just prefix + * matches, unless strict section name mode + * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the + * match has to be exact. + */ + if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { + if (str_has_pfx(sec_name, sec_def->sec)) + return sec_def; + continue; + } + + /* Definitions not marked SEC_SLOPPY_PFX (e.g., + * SEC("syscall")) are exact matches in both modes. + */ + if (strcmp(sec_name, sec_def->sec) == 0) + return sec_def; } return NULL; } @@ -8101,8 +8436,15 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (attach_type && !section_defs[i].is_attachable) - continue; + const struct bpf_sec_def *sec_def = §ion_defs[i]; + + if (attach_type) { + if (sec_def->preload_fn != libbpf_preload_prog) + continue; + + if (!(sec_def->cookie & SEC_ATTACHABLE)) + continue; + } if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); @@ -8161,7 +8503,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, /* Collect the reloc from ELF and populate the st_ops->progs[] */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const struct btf_member *member; struct bpf_struct_ops *st_ops; @@ -8169,58 +8511,58 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, unsigned int shdr_idx; const struct btf *btf; struct bpf_map *map; - Elf_Data *symbols; unsigned int moff, insn_idx; const char *name; __u32 member_idx; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; int i, nrels; - symbols = obj->efile.symbols; btf = obj->btf; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("struct_ops reloc: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("struct_ops reloc: symbol %zx not found\n", - (size_t)GELF_R_SYM(rel.r_info)); + (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: "<?>"; - map = find_struct_ops_map_by_offset(obj, rel.r_offset); + name = elf_sym_str(obj, sym->st_name) ?: "<?>"; + map = find_struct_ops_map_by_offset(obj, rel->r_offset); if (!map) { - pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", - (size_t)rel.r_offset); + pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", + (size_t)rel->r_offset); return -EINVAL; } - moff = rel.r_offset - map->sec_offset; - shdr_idx = sym.st_shndx; + moff = rel->r_offset - map->sec_offset; + shdr_idx = sym->st_shndx; st_ops = map->st_ops; - pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", map->name, - (long long)(rel.r_info >> 32), - (long long)sym.st_value, - shdr_idx, (size_t)rel.r_offset, - map->sec_offset, sym.st_name, name); + (long long)(rel->r_info >> 32), + (long long)sym->st_value, + shdr_idx, (size_t)rel->r_offset, + map->sec_offset, sym->st_name, name); if (shdr_idx >= SHN_LORESERVE) { - pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", - map->name, (size_t)rel.r_offset, shdr_idx); + pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel->r_offset, shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (sym.st_value % BPF_INSN_SZ) { + if (sym->st_value % BPF_INSN_SZ) { pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", - map->name, (unsigned long long)sym.st_value); + map->name, (unsigned long long)sym->st_value); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = sym.st_value / BPF_INSN_SZ; + insn_idx = sym->st_value / BPF_INSN_SZ; member = find_member_by_offset(st_ops->type, moff * 8); if (!member) { @@ -8244,35 +8586,37 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - if (prog->type == BPF_PROG_TYPE_UNSPEC) { - const struct bpf_sec_def *sec_def; - - sec_def = find_sec_def(prog->sec_name); - if (sec_def && - sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { - /* for pr_warn */ - prog->type = sec_def->prog_type; - goto invalid_prog; - } + /* prevent the use of BPF prog with invalid type */ + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { + pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", + map->name, prog->name); + return -EINVAL; + } - prog->type = BPF_PROG_TYPE_STRUCT_OPS; + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { prog->attach_btf_id = st_ops->type_id; prog->expected_attach_type = member_idx; - } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || - prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - goto invalid_prog; } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops as long as it's the same struct_ops struct + * definition and the same function pointer field + */ + if (prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; + } + st_ops->progs[member_idx] = prog; } return 0; - -invalid_prog: - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; } #define BTF_TRACE_PREFIX "btf_trace_" @@ -8352,28 +8696,27 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info *info; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct btf *btf; int err; - info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); - err = libbpf_get_error(info_linear); + err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed get_prog_info_linear for FD %d\n", - attach_prog_fd); + pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", + attach_prog_fd, err); return err; } err = -EINVAL; - info = &info_linear->info; - if (!info->btf_id) { + if (!info.btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - pr_warn("Failed to get BTF of the program\n"); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); + if (err) { + pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -8383,7 +8726,6 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) goto out; } out: - free(info_linear); return err; } @@ -8424,32 +8766,12 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, return -ESRCH; } -static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id) +static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, + int *btf_obj_fd, int *btf_type_id) { enum bpf_attach_type attach_type = prog->expected_attach_type; __u32 attach_prog_fd = prog->attach_prog_fd; - const char *name = prog->sec_name, *attach_name; - const struct bpf_sec_def *sec = NULL; - int i, err = 0; - - if (!name) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (!section_defs[i].is_attach_btf) - continue; - if (strncmp(name, section_defs[i].sec, section_defs[i].len)) - continue; - - sec = §ion_defs[i]; - break; - } - - if (!sec) { - pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name); - return -ESRCH; - } - attach_name = name + sec->len; + int err = 0; /* BPF program's BTF ID */ if (attach_prog_fd) { @@ -8483,27 +8805,30 @@ int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { char *type_names; - int i; + const struct bpf_sec_def *sec_def; if (!name) return libbpf_err(-EINVAL); - for (i = 0; i < ARRAY_SIZE(section_defs); i++) { - if (strncmp(name, section_defs[i].sec, section_defs[i].len)) - continue; - if (!section_defs[i].is_attachable) - return libbpf_err(-EINVAL); - *attach_type = section_defs[i].expected_attach_type; - return 0; - } - pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); - type_names = libbpf_get_type_names(true); - if (type_names != NULL) { - pr_debug("attachable section(type) names are:%s\n", type_names); - free(type_names); + sec_def = find_sec_def(name); + if (!sec_def) { + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_debug("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + + return libbpf_err(-EINVAL); } - return libbpf_err(-EINVAL); + if (sec_def->preload_fn != libbpf_preload_prog) + return libbpf_err(-EINVAL); + if (!(sec_def->cookie & SEC_ATTACHABLE)) + return libbpf_err(-EINVAL); + + *attach_type = sec_def->expected_attach_type; + return 0; } int bpf_map__fd(const struct bpf_map *map) @@ -8516,9 +8841,30 @@ const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) return map ? &map->def : libbpf_err_ptr(-EINVAL); } +static bool map_uses_real_name(const struct bpf_map *map) +{ + /* Since libbpf started to support custom .data.* and .rodata.* maps, + * their user-visible name differs from kernel-visible name. Users see + * such map's corresponding ELF section name as a map name. + * This check distinguishes .data/.rodata from .data.* and .rodata.* + * maps to know which name has to be returned to the user. + */ + if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) + return true; + if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) + return true; + return false; +} + const char *bpf_map__name(const struct bpf_map *map) { - return map ? map->name : NULL; + if (!map) + return NULL; + + if (map_uses_real_name(map)) + return map->real_name; + + return map->name; } enum bpf_map_type bpf_map__type(const struct bpf_map *map) @@ -8547,6 +8893,19 @@ int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) return 0; } +__u64 bpf_map__map_extra(const struct bpf_map *map) +{ + return map->map_extra; +} + +int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) +{ + if (map->fd >= 0) + return libbpf_err(-EBUSY); + map->map_extra = map_extra; + return 0; +} + __u32 bpf_map__numa_node(const struct bpf_map *map) { return map->numa_node; @@ -8701,6 +9060,12 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) { + return bpf_object__next_map(obj, prev); +} + +struct bpf_map * +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) +{ if (prev == NULL) return obj->maps; @@ -8710,6 +9075,12 @@ bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) struct bpf_map * bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) { + return bpf_object__prev_map(obj, next); +} + +struct bpf_map * +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) +{ if (next == NULL) { if (!obj->nr_maps) return NULL; @@ -8725,7 +9096,22 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) struct bpf_map *pos; bpf_object__for_each_map(pos, obj) { - if (pos->name && !strcmp(pos->name, name)) + /* if it's a special internal map name (which always starts + * with dot) then check if that special name matches the + * real map name (ELF section name) + */ + if (name[0] == '.') { + if (pos->real_name && strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + /* otherwise map name has to be an exact match */ + if (map_uses_real_name(pos)) { + if (strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + if (strcmp(pos->name, name) == 0) return pos; } return errno = ENOENT, NULL; @@ -8990,8 +9376,15 @@ int bpf_link__unpin(struct bpf_link *link) struct bpf_link_perf { struct bpf_link link; int perf_event_fd; + /* legacy kprobe support: keep track of probe identifier and type */ + char *legacy_probe_name; + bool legacy_is_kprobe; + bool legacy_is_retprobe; }; +static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); +static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); + static int bpf_link_perf_detach(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -9004,17 +9397,29 @@ static int bpf_link_perf_detach(struct bpf_link *link) close(perf_link->perf_event_fd); close(link->fd); - return libbpf_err(err); + /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ + if (perf_link->legacy_probe_name) { + if (perf_link->legacy_is_kprobe) { + err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, + perf_link->legacy_is_retprobe); + } else { + err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, + perf_link->legacy_is_retprobe); + } + } + + return err; } static void bpf_link_perf_dealloc(struct bpf_link *link) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + free(perf_link->legacy_probe_name); free(perf_link); } -struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { char errmsg[STRERR_BUFSIZE]; @@ -9089,7 +9494,7 @@ err_out: return libbpf_err_ptr(err); } -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) { return bpf_program__attach_perf_event_opts(prog, pfd, NULL); } @@ -9206,16 +9611,110 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } +static int append_to_file(const char *file, const char *fmt, ...) +{ + int fd, n, err = 0; + va_list ap; + + fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); + if (fd < 0) + return -errno; + + va_start(ap, fmt); + n = vdprintf(fd, fmt, ap); + va_end(ap); + + if (n < 0) + err = -errno; + + close(fd); + return err; +} + +static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, + const char *kfunc_name, size_t offset) +{ + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); +} + +static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, + const char *kfunc_name, size_t offset) +{ + const char *file = "/sys/kernel/debug/tracing/kprobe_events"; + + return append_to_file(file, "%c:%s/%s %s+0x%zx", + retprobe ? 'r' : 'p', + retprobe ? "kretprobes" : "kprobes", + probe_name, kfunc_name, offset); +} + +static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) +{ + const char *file = "/sys/kernel/debug/tracing/kprobe_events"; + + return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); +} + +static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) +{ + char file[256]; + + snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + retprobe ? "kretprobes" : "kprobes", probe_name); + + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, + const char *kfunc_name, size_t offset, int pid) +{ + struct perf_event_attr attr = {}; + char errmsg[STRERR_BUFSIZE]; + int type, pfd, err; + + err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); + if (err < 0) { + pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", + kfunc_name, offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + type = determine_kprobe_perf_type_legacy(probe_name, retprobe); + if (type < 0) { + pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", + kfunc_name, offset, + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + return type; + } + attr.size = sizeof(attr); + attr.config = type; + attr.type = PERF_TYPE_TRACEPOINT; + + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid, /* pid */ + pid == -1 ? 0 : -1, /* cpu */ + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("legacy kprobe perf_event_open() failed: %s\n", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + return err; + } + return pfd; +} + struct bpf_link * -bpf_program__attach_kprobe_opts(struct bpf_program *prog, +bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE]; + char *legacy_probe = NULL; struct bpf_link *link; - unsigned long offset; - bool retprobe; + size_t offset; + bool retprobe, legacy; int pfd, err; if (!OPTS_VALID(opts, bpf_kprobe_opts)) @@ -9225,27 +9724,57 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, - offset, -1 /* pid */, 0 /* ref_ctr_off */); + legacy = determine_kprobe_perf_type() < 0; + if (!legacy) { + pfd = perf_event_open_probe(false /* uprobe */, retprobe, + func_name, offset, + -1 /* pid */, 0 /* ref_ctr_off */); + } else { + char probe_name[256]; + + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); + + legacy_probe = strdup(func_name); + if (!legacy_probe) + return libbpf_err_ptr(-ENOMEM); + + pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, + offset, -1 /* pid */); + } if (pfd < 0) { - pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(pfd); + err = -errno; + pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", + func_name, offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { close(pfd); - pr_warn("prog '%s': failed to attach to %s '%s': %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", + prog->name, retprobe ? "kretprobe" : "kprobe", + func_name, offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(err); + goto err_out; } + if (legacy) { + struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + + perf_link->legacy_probe_name = legacy_probe; + perf_link->legacy_is_kprobe = true; + perf_link->legacy_is_retprobe = retprobe; + } + return link; +err_out: + free(legacy_probe); + return libbpf_err_ptr(err); } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name) { @@ -9256,8 +9785,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; @@ -9266,8 +9794,11 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, char *func; int n, err; - func_name = prog->sec_name + sec->len; - opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); + if (opts.retprobe) + func_name = prog->sec_name + sizeof("kretprobe/") - 1; + else + func_name = prog->sec_name + sizeof("kprobe/") - 1; n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { @@ -9288,17 +9819,96 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, return link; } +static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, + const char *binary_path, uint64_t offset) +{ + int i; + + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); + + /* sanitize binary_path in the probe name */ + for (i = 0; buf[i]; i++) { + if (!isalnum(buf[i])) + buf[i] = '_'; + } +} + +static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, + const char *binary_path, size_t offset) +{ + const char *file = "/sys/kernel/debug/tracing/uprobe_events"; + + return append_to_file(file, "%c:%s/%s %s:0x%zx", + retprobe ? 'r' : 'p', + retprobe ? "uretprobes" : "uprobes", + probe_name, binary_path, offset); +} + +static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) +{ + const char *file = "/sys/kernel/debug/tracing/uprobe_events"; + + return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); +} + +static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) +{ + char file[512]; + + snprintf(file, sizeof(file), + "/sys/kernel/debug/tracing/events/%s/%s/id", + retprobe ? "uretprobes" : "uprobes", probe_name); + + return parse_uint_from_file(file, "%d\n"); +} + +static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, + const char *binary_path, size_t offset, int pid) +{ + struct perf_event_attr attr; + int type, pfd, err; + + err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); + if (err < 0) { + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", + binary_path, (size_t)offset, err); + return err; + } + type = determine_uprobe_perf_type_legacy(probe_name, retprobe); + if (type < 0) { + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", + binary_path, offset, err); + return type; + } + + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(attr); + attr.config = type; + attr.type = PERF_TYPE_TRACEPOINT; + + pfd = syscall(__NR_perf_event_open, &attr, + pid < 0 ? -1 : pid, /* pid */ + pid == -1 ? 0 : -1, /* cpu */ + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (pfd < 0) { + err = -errno; + pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); + return err; + } + return pfd; +} + LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; + char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; - bool retprobe; + bool retprobe, legacy; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -9307,15 +9917,35 @@ bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, - func_offset, pid, ref_ctr_off); + legacy = determine_uprobe_perf_type() < 0; + if (!legacy) { + pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, + func_offset, pid, ref_ctr_off); + } else { + char probe_name[512]; + + if (ref_ctr_off) + return libbpf_err_ptr(-EINVAL); + + gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), + binary_path, func_offset); + + legacy_probe = strdup(probe_name); + if (!legacy_probe) + return libbpf_err_ptr(-ENOMEM); + + pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, + binary_path, func_offset, pid); + } if (pfd < 0) { + err = -errno; pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(pfd); + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto err_out; } + link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { @@ -9324,12 +9954,23 @@ bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return libbpf_err_ptr(err); + goto err_out; + } + if (legacy) { + struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + + perf_link->legacy_probe_name = legacy_probe; + perf_link->legacy_is_kprobe = false; + perf_link->legacy_is_retprobe = retprobe; } return link; +err_out: + free(legacy_probe); + return libbpf_err_ptr(err); + } -struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset) @@ -9389,7 +10030,7 @@ static int perf_event_open_tracepoint(const char *tp_category, return pfd; } -struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts) @@ -9423,15 +10064,14 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, return link; } -struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, const char *tp_category, const char *tp_name) { return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) { char *sec_name, *tp_cat, *tp_name; struct bpf_link *link; @@ -9440,8 +10080,11 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, if (!sec_name) return libbpf_err_ptr(-ENOMEM); - /* extract "tp/<category>/<name>" */ - tp_cat = sec_name + sec->len; + /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ + if (str_has_pfx(prog->sec_name, "tp/")) + tp_cat = sec_name + sizeof("tp/") - 1; + else + tp_cat = sec_name + sizeof("tracepoint/") - 1; tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); @@ -9455,7 +10098,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, return link; } -struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name) { char errmsg[STRERR_BUFSIZE]; @@ -9485,16 +10128,34 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { - const char *tp_name = prog->sec_name + sec->len; + static const char *const prefixes[] = { + "raw_tp/", + "raw_tracepoint/", + "raw_tp.w/", + "raw_tracepoint.w/", + }; + size_t i; + const char *tp_name = NULL; + + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { + if (str_has_pfx(prog->sec_name, prefixes[i])) { + tp_name = prog->sec_name + strlen(prefixes[i]); + break; + } + } + if (!tp_name) { + pr_warn("prog '%s': invalid section name '%s'\n", + prog->name, prog->sec_name); + return libbpf_err_ptr(-EINVAL); + } return bpf_program__attach_raw_tracepoint(prog, tp_name); } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -9523,30 +10184,28 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) return (struct bpf_link *)link; } -struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog) +struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { return bpf_program__attach_btf_id(prog); } -static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) { return bpf_program__attach_trace(prog); } -static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) { return bpf_program__attach_lsm(prog); } static struct bpf_link * -bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, +bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, const char *target_name) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, @@ -9582,24 +10241,24 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, } struct bpf_link * -bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) +bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) { return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); } struct bpf_link * -bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) +bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) { return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); } -struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex) +struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); } -struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, +struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name) { @@ -9632,7 +10291,7 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, } struct bpf_link * -bpf_program__attach_iter(struct bpf_program *prog, +bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); @@ -9671,21 +10330,17 @@ bpf_program__attach_iter(struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, - struct bpf_program *prog) +static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) { return bpf_program__attach_iter(prog, NULL); } -struct bpf_link *bpf_program__attach(struct bpf_program *prog) +struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { - const struct bpf_sec_def *sec_def; - - sec_def = find_sec_def(prog->sec_name); - if (!sec_def || !sec_def->attach_fn) + if (!prog->sec_def || !prog->sec_def->attach_fn) return libbpf_err_ptr(-ESRCH); - return sec_def->attach_fn(sec_def, prog); + return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); } static int bpf_link__detach_struct_ops(struct bpf_link *link) @@ -9698,7 +10353,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link) return 0; } -struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) +struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { struct bpf_struct_ops *st_ops; struct bpf_link *link; @@ -10511,18 +11166,29 @@ int bpf_program__set_attach_target(struct bpf_program *prog, { int btf_obj_fd = 0, btf_id = 0, err; - if (!prog || attach_prog_fd < 0 || !attach_func_name) + if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); if (prog->obj->loaded) return libbpf_err(-EINVAL); + if (attach_prog_fd && !attach_func_name) { + /* remember attach_prog_fd and let bpf_program__load() find + * BTF ID during the program load + */ + prog->attach_prog_fd = attach_prog_fd; + return 0; + } + if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { + if (!attach_func_name) + return libbpf_err(-EINVAL); + /* load btf_vmlinux, if not yet */ err = bpf_object__load_vmlinux_btf(prog->obj, true); if (err) @@ -10595,7 +11261,7 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) int fd, err = 0, len; char buf[128]; - fd = open(fcpu, O_RDONLY); + fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); @@ -10764,16 +11430,15 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->prog_cnt; i++) { struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - const struct bpf_sec_def *sec_def; if (!prog->load) continue; - sec_def = find_sec_def(prog->sec_name); - if (!sec_def || !sec_def->attach_fn) + /* auto-attaching not supported for this program */ + if (!prog->sec_def || !prog->sec_def->attach_fn) continue; - *link = sec_def->attach_fn(sec_def, prog); + *link = bpf_program__attach(prog); err = libbpf_get_error(*link); if (err) { pr_warn("failed to auto-attach program '%s': %d\n", diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index f177d897c5f7..9de0f299706b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -83,12 +83,15 @@ struct bpf_object_open_opts { * Non-relocatable instructions are replaced with invalid ones to * prevent accidental errors. * */ + LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect") bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; + + LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program") __u32 attach_prog_fd; /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. @@ -147,6 +150,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); +LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); @@ -164,7 +168,8 @@ LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") +struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ @@ -186,16 +191,22 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") +struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_program__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_program__next((pos), (obj))) +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_object__next_program((obj), NULL); \ + (pos) != NULL; \ + (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") +struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -214,14 +225,51 @@ LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); +struct bpf_insn; + +/** + * @brief **bpf_program__insns()** gives read-only access to BPF program's + * underlying BPF instructions. + * @param prog BPF program for which to return instructions + * @return a pointer to an array of BPF instructions that belong to the + * specified BPF program + * + * Returned pointer is always valid and not NULL. Number of `struct bpf_insn` + * pointed to can be fetched using **bpf_program__insn_cnt()** API. + * + * Keep in mind, libbpf can modify and append/delete BPF program's + * instructions as it processes BPF object file and prepares everything for + * uploading into the kernel. So depending on the point in BPF object + * lifetime, **bpf_program__insns()** can return different sets of + * instructions. As an example, during BPF object load phase BPF program + * instructions will be CO-RE-relocated, BPF subprograms instructions will be + * appended, ldimm64 instructions will have FDs embedded, etc. So instructions + * returned before **bpf_object__load()** and after it might be quite + * different. + */ +LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); +/** + * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s + * that form specified BPF program. + * @param prog BPF program for which to return number of BPF instructions + * + * See **bpf_program__insns()** documentation for notes on how libbpf can + * change instructions and their count during different phases of + * **bpf_object** lifetime. + */ +LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); @@ -243,7 +291,7 @@ LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * -bpf_program__attach(struct bpf_program *prog); +bpf_program__attach(const struct bpf_program *prog); struct bpf_perf_event_opts { /* size of this struct, for forward/backward compatiblity */ @@ -254,10 +302,10 @@ struct bpf_perf_event_opts { #define bpf_perf_event_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); +bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * -bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts); struct bpf_kprobe_opts { @@ -266,7 +314,7 @@ struct bpf_kprobe_opts { /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; /* function's offset to install kprobe to */ - unsigned long offset; + size_t offset; /* kprobe is return probe */ bool retprobe; size_t :0; @@ -274,10 +322,10 @@ struct bpf_kprobe_opts { #define bpf_kprobe_opts__last_field retprobe LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, +bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name); LIBBPF_API struct bpf_link * -bpf_program__attach_kprobe_opts(struct bpf_program *prog, +bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts); @@ -297,11 +345,11 @@ struct bpf_uprobe_opts { #define bpf_uprobe_opts__last_field retprobe LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, +bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); LIBBPF_API struct bpf_link * -bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); @@ -314,35 +362,35 @@ struct bpf_tracepoint_opts { #define bpf_tracepoint_opts__last_field bpf_cookie LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint(struct bpf_program *prog, +bpf_program__attach_tracepoint(const struct bpf_program *prog, const char *tp_category, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_category, const char *tp_name, const struct bpf_tracepoint_opts *opts); LIBBPF_API struct bpf_link * -bpf_program__attach_raw_tracepoint(struct bpf_program *prog, +bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); LIBBPF_API struct bpf_link * -bpf_program__attach_trace(struct bpf_program *prog); +bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_lsm(struct bpf_program *prog); +bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * -bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); +bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); +bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * -bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); +bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * -bpf_program__attach_freplace(struct bpf_program *prog, +bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name); struct bpf_map; -LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -352,11 +400,9 @@ struct bpf_iter_attach_opts { #define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * -bpf_program__attach_iter(struct bpf_program *prog, +bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); -struct bpf_insn; - /* * Libbpf allows callers to adjust BPF programs before being loaded * into kernel. One program in an object file can be transformed into @@ -385,7 +431,7 @@ struct bpf_insn; * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ - +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -414,9 +460,11 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, bpf_program_prep_t prep); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* @@ -478,9 +526,13 @@ struct bpf_map_def { unsigned int map_flags; }; -/* - * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, - * so no need to worry about a name clash. +/** + * @brief **bpf_object__find_map_by_name()** returns BPF map of + * the given name, if it exists within the passed BPF object + * @param obj BPF object + * @param name name of the BPF map + * @return BPF map instance, if such map exists within the BPF object; + * or NULL otherwise. */ LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -495,18 +547,28 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") +struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); + #define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_map__next(NULL, (obj)); \ + for ((pos) = bpf_object__next_map((obj), NULL); \ (pos) != NULL; \ - (pos) = bpf_map__next((pos), (obj))) + (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") +struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); -/* get/set map FD */ +/** + * @brief **bpf_map__fd()** gets the file descriptor of the passed + * BPF map + * @param map the BPF map instance + * @return the file descriptor; or -EINVAL in case of an error + */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ @@ -538,6 +600,9 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); /* get/set map if_index */ LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +/* get/set map map_extra flags */ +LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -547,6 +612,14 @@ LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); + +/** + * @brief **bpf_map__is_internal()** tells the caller whether or not the + * passed map is a special map created by libbpf automatically for things like + * global variables, __ksym externs, Kconfig values, etc + * @param map the bpf_map + * @return true, if the map is an internal map; false, otherwise + */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); @@ -558,6 +631,38 @@ LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); +/** + * @brief **libbpf_get_error()** extracts the error code from the passed + * pointer + * @param ptr pointer returned from libbpf API function + * @return error code; or 0 if no error occured + * + * Many libbpf API functions which return pointers have logic to encode error + * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()** + * should be used on the return value from these functions immediately after + * calling the API function, with no intervening calls that could clobber the + * `errno` variable. Consult the individual functions documentation to verify + * if this logic applies should be used. + * + * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` + * is enabled, NULL is returned on error instead. + * + * If ptr is NULL, then errno should be already set by the failing + * API, because libbpf never returns NULL on success and it now always + * sets errno on error. + * + * Example usage: + * + * struct perf_buffer *pb; + * + * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts); + * err = libbpf_get_error(pb); + * if (err) { + * pb = NULL; + * fprintf(stderr, "failed to open perf buffer: %d\n", err); + * goto cleanup; + * } + */ LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { @@ -822,9 +927,10 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); LIBBPF_API void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); -/* - * A helper function to get the number of possible CPUs before looking up - * per-CPU maps. Negative errno is returned on failure. +/** + * @brief **libbpf_num_possible_cpus()** is a helper function to get the + * number of possible CPUs that the host kernel supports and expects. + * @return number of possible CPUs; or error code on failure * * Example usage: * @@ -834,7 +940,6 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); * } * long values[ncpus]; * bpf_map_lookup_elem(per_cpu_map_fd, key, values); - * */ LIBBPF_API int libbpf_num_possible_cpus(void); @@ -854,7 +959,7 @@ struct bpf_object_skeleton { size_t sz; /* size of this struct, for forward/backward compatibility */ const char *name; - void *data; + const void *data; size_t data_sz; struct bpf_object **obj; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index bbc53bb25f68..43580eb47740 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -386,3 +386,19 @@ LIBBPF_0.5.0 { btf_dump__dump_type_data; libbpf_set_strict_mode; } LIBBPF_0.4.0; + +LIBBPF_0.6.0 { + global: + bpf_map__map_extra; + bpf_map__set_map_extra; + bpf_object__next_map; + bpf_object__next_program; + bpf_object__prev_map; + bpf_object__prev_program; + bpf_program__insn_cnt; + bpf_program__insns; + btf__add_btf; + btf__add_decl_tag; + btf__raw_data; + btf__type_cnt; +} LIBBPF_0.5.0; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index 947d8bd8a7bb..aaa1efbf6f51 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_COMMON_H #include <string.h> +#include "libbpf_version.h" #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) @@ -17,6 +18,29 @@ #define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) +/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */ +#define LIBBPF_DEPRECATED_SINCE(major, minor, msg) \ + __LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor \ + (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg)) + +#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ + (LIBBPF_MAJOR_VERSION > (major) || \ + (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) + +/* Add checks for other versions below when planning deprecation of API symbols + * with the LIBBPF_DEPRECATED_SINCE macro. + */ +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6) +#define __LIBBPF_MARK_DEPRECATED_0_6(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_6(X) +#endif +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7) +#define __LIBBPF_MARK_DEPRECATED_0_7(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_7(X) +#endif + /* Helper macro to declare and initialize libbpf options struct * * This dance with uninitialized declaration, followed by memset to zero, diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 533b0211f40a..aeb79e3a8ff9 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -13,6 +13,8 @@ #include <limits.h> #include <errno.h> #include <linux/err.h> +#include <fcntl.h> +#include <unistd.h> #include "libbpf_legacy.h" #include "relo_core.h" @@ -52,8 +54,8 @@ #endif /* Older libelf all end up in this expression, for both 32 and 64 bit */ -#ifndef GELF_ST_VISIBILITY -#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -69,6 +71,8 @@ #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) +#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -87,20 +91,40 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +/* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is + * a string literal known at compilation time or char * pointer known only at + * runtime. + */ +#define str_has_pfx(str, pfx) \ + (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. + * Starting with GNU C 10, use symver attribute instead of .symver assembler + * directive, which works better with GCC LTO builds. */ -#ifdef SHARED -# define COMPAT_VERSION(internal_name, api_name, version) \ +#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10 + +#define DEFAULT_VERSION(internal_name, api_name, version) \ + __attribute__((symver(#api_name "@@" #version))) +#define COMPAT_VERSION(internal_name, api_name, version) \ + __attribute__((symver(#api_name "@" #version))) + +#elif defined(SHARED) + +#define COMPAT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@" #version); -# define DEFAULT_VERSION(internal_name, api_name, version) \ +#define DEFAULT_VERSION(internal_name, api_name, version) \ asm(".symver " #internal_name "," #api_name "@@" #version); -#else -# define COMPAT_VERSION(internal_name, api_name, version) -# define DEFAULT_VERSION(internal_name, api_name, version) \ + +#else /* !SHARED */ + +#define COMPAT_VERSION(internal_name, api_name, version) +#define DEFAULT_VERSION(internal_name, api_name, version) \ extern typeof(internal_name) api_name \ __attribute__((alias(#internal_name))); + #endif extern void libbpf_print(enum libbpf_print_level level, @@ -171,8 +195,9 @@ enum map_def_parts { MAP_DEF_NUMA_NODE = 0x080, MAP_DEF_PINNING = 0x100, MAP_DEF_INNER_MAP = 0x200, + MAP_DEF_MAP_EXTRA = 0x400, - MAP_DEF_ALL = 0x3ff, /* combination of all above */ + MAP_DEF_ALL = 0x7ff, /* combination of all above */ }; struct btf_map_def { @@ -186,6 +211,7 @@ struct btf_map_def { __u32 map_flags; __u32 numa_node; __u32 pinning; + __u64 map_extra; }; int parse_btf_map_def(const char *map_name, struct btf *btf, @@ -276,14 +302,32 @@ struct bpf_prog_load_params { __u32 log_level; char *log_buf; size_t log_buf_sz; + int *fd_array; }; int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); +struct bpf_create_map_params { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; + __u64 map_extra; +}; + +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); @@ -386,6 +430,8 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind); extern enum libbpf_strict_mode libbpf_mode; @@ -447,4 +493,26 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 + * Takes ownership of the fd passed in, and closes it if calling + * fcntl(fd, F_DUPFD_CLOEXEC, 3). + */ +static inline int ensure_good_fd(int fd) +{ + int old_fd = fd, saved_errno; + + if (fd < 0) + return fd; + if (fd < 3) { + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + saved_errno = errno; + close(old_fd); + if (fd < 0) { + pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); + errno = saved_errno; + } + } + return fd; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index df0d03dcffab..5ba5c9beccfa 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -46,6 +46,24 @@ enum libbpf_strict_mode { */ LIBBPF_STRICT_DIRECT_ERRS = 0x02, + /* + * Enforce strict BPF program section (SEC()) names. + * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were + * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become + * unrecognized by libbpf and would have to be just SEC("xdp") and + * SEC("xdp") and SEC("perf_event"). + * + * Note, in this mode the program pin path will be based on the + * function name instead of section name. + */ + LIBBPF_STRICT_SEC_NAME = 0x04, + /* + * Disable the global 'bpf_objects_list'. Maintaining this list adds + * a race condition to bpf_object__open() and bpf_object__close(). + * Clients can maintain it on their own if it is valuable for them. + */ + LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, + __LIBBPF_STRICT_LAST, }; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index cd8c703dde71..68f2dbf364aa 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -33,7 +33,7 @@ static int get_vendor_id(int ifindex) snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) return -1; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h new file mode 100644 index 000000000000..dd56d76f291c --- /dev/null +++ b/tools/lib/bpf/libbpf_version.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (C) 2021 Facebook */ +#ifndef __LIBBPF_VERSION_H +#define __LIBBPF_VERSION_H + +#define LIBBPF_MAJOR_VERSION 0 +#define LIBBPF_MINOR_VERSION 6 + +#endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 2df880cefdae..f677dccdeae4 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -15,7 +15,6 @@ #include <linux/btf.h> #include <elf.h> #include <libelf.h> -#include <gelf.h> #include <fcntl.h> #include "libbpf.h" #include "btf.h" @@ -302,7 +301,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) if (!linker->filename) return -ENOMEM; - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (linker->fd < 0) { err = -errno; pr_warn("failed to create '%s': %d\n", file, err); @@ -324,12 +323,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif /* STRTAB */ @@ -539,12 +538,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ const int host_endianness = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif int err = 0; Elf_Scn *scn; @@ -557,7 +556,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->filename = filename; - obj->fd = open(filename, O_RDONLY); + obj->fd = open(filename, O_RDONLY | O_CLOEXEC); if (obj->fd < 0) { err = -errno; pr_warn("failed to open file '%s': %d\n", filename, err); @@ -921,7 +920,7 @@ static int check_btf_type_id(__u32 *type_id, void *ctx) { struct btf *btf = ctx; - if (*type_id > btf__get_nr_types(btf)) + if (*type_id >= btf__type_cnt(btf)) return -EINVAL; return 0; @@ -948,8 +947,8 @@ static int linker_sanity_check_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf_type_by_id(obj->btf, i); err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); @@ -1659,8 +1658,8 @@ static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sy return -EINVAL; } - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(obj->btf, i); /* some global and extern FUNCs and VARs might not be associated with any @@ -2131,8 +2130,8 @@ static int linker_fixup_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { struct btf_var_secinfo *vi; struct btf_type *t; @@ -2235,14 +2234,14 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (!obj->btf) return 0; - start_id = btf__get_nr_types(linker->btf) + 1; - n = btf__get_nr_types(obj->btf); + start_id = btf__type_cnt(linker->btf); + n = btf__type_cnt(obj->btf); obj->btf_type_map = calloc(n + 1, sizeof(int)); if (!obj->btf_type_map) return -ENOMEM; - for (i = 1; i <= n; i++) { + for (i = 1; i < n; i++) { struct glob_sym *glob_sym = NULL; t = btf__type_by_id(obj->btf, i); @@ -2297,8 +2296,8 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) } /* remap all the types except DATASECs */ - n = btf__get_nr_types(linker->btf); - for (i = start_id; i <= n; i++) { + n = btf__type_cnt(linker->btf); + for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) @@ -2657,7 +2656,7 @@ static int finalize_btf(struct bpf_linker *linker) __u32 raw_sz; /* bail out if no BTF data was produced */ - if (btf__get_nr_types(linker->btf) == 0) + if (btf__type_cnt(linker->btf) == 1) return 0; for (i = 1; i < linker->sec_cnt; i++) { @@ -2694,7 +2693,7 @@ static int finalize_btf(struct bpf_linker *linker) } /* Emit .BTF section */ - raw_data = btf__get_raw_data(linker->btf, &raw_sz); + raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) return -ENOMEM; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 4016ed492d0c..b5b8956a1be8 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -662,7 +662,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = true; /* signedness is never ambiguous */ break; case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index b22b50c1b173..9cf66702fa8d 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -105,10 +105,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; - if (err < 0) + if (err < 0) { err = -errno; - else + } else { err = (int)attr.test.retval; + errno = -err; + } goto out; } err = 0; diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index 1fb8b49de1d6..ea655318153f 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -88,6 +88,7 @@ void strset__free(struct strset *set) hashmap__free(set->strs_hash); free(set->strs_data); + free(set); } size_t strset__data_size(const struct strset *set) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e9b619aa0cdf..81f8fbc85e70 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -281,6 +281,7 @@ out_mmap: return err; } +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -299,7 +300,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, if (!umem) return -ENOMEM; - umem->fd = socket(AF_XDP, SOCK_RAW, 0); + umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (umem->fd < 0) { err = -errno; goto out_umem_alloc; @@ -345,6 +346,7 @@ struct xsk_umem_config_v1 { __u32 frame_headroom; }; +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -358,8 +360,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static enum xsk_prog get_xsk_prog(void) { @@ -549,7 +549,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_LOCAL, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) return -errno; @@ -1046,7 +1046,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (xsk->fd < 0) { err = -errno; goto out_xsk_alloc; diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 01c12dca9c10..64e9c57fd792 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -23,6 +23,12 @@ extern "C" { #endif +/* This whole API has been deprecated and moved to libxdp that can be found at + * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so + * it should just be linking with libxdp instead of libbpf for this set of + * functionality. If not, please submit a bug report on the aforementioned page. + */ + /* Load-Acquire Store-Release barriers used by the XDP socket * library. The following macros should *NOT* be considered part of * the xsk.h API, and is subject to change anytime. @@ -245,8 +251,10 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); } -LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 @@ -263,10 +271,10 @@ struct xsk_umem_config { __u32 flags; }; -LIBBPF_API int xsk_setup_xdp_prog(int ifindex, - int *xsks_map_fd); -LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, - int xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) @@ -280,40 +288,46 @@ struct xsk_socket_config { }; /* Set config to NULL to get the default configuration. */ -LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API int -xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +void xsk_socket__delete(struct xsk_socket *xsk); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c new file mode 100644 index 000000000000..10c067e3a8d2 --- /dev/null +++ b/tools/lib/list_sort.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/string.h> +#include <linux/list_sort.h> +#include <linux/list.h> + +/* + * Returns a list organized in an intermediate format suited + * to chaining of merge() calls: null-terminated, no reserved or + * sentinel head node, "prev" links not maintained. + */ +__attribute__((nonnull(2,3,4))) +static struct list_head *merge(void *priv, list_cmp_func_t cmp, + struct list_head *a, struct list_head *b) +{ + struct list_head *head, **tail = &head; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ + if (cmp(priv, a, b) <= 0) { + *tail = a; + tail = &a->next; + a = a->next; + if (!a) { + *tail = b; + break; + } + } else { + *tail = b; + tail = &b->next; + b = b->next; + if (!b) { + *tail = a; + break; + } + } + } + return head; +} + +/* + * Combine final list merge with restoration of standard doubly-linked + * list structure. This approach duplicates code from merge(), but + * runs faster than the tidier alternatives of either a separate final + * prev-link restoration pass, or maintaining the prev links + * throughout. + */ +__attribute__((nonnull(2,3,4,5))) +static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, + struct list_head *a, struct list_head *b) +{ + struct list_head *tail = head; + u8 count = 0; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ + if (cmp(priv, a, b) <= 0) { + tail->next = a; + a->prev = tail; + tail = a; + a = a->next; + if (!a) + break; + } else { + tail->next = b; + b->prev = tail; + tail = b; + b = b->next; + if (!b) { + b = a; + break; + } + } + } + + /* Finish linking remainder of list b on to tail */ + tail->next = b; + do { + /* + * If the merge is highly unbalanced (e.g. the input is + * already sorted), this loop may run many iterations. + * Continue callbacks to the client even though no + * element comparison is needed, so the client's cmp() + * routine can invoke cond_resched() periodically. + */ + if (unlikely(!++count)) + cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; + } while (b); + + /* And the final links to make a circular doubly-linked list */ + tail->next = head; + head->prev = tail; +} + +/** + * list_sort - sort a list + * @priv: private data, opaque to list_sort(), passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * The comparison function @cmp must return > 0 if @a should sort after + * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should + * sort before @b *or* their original order should be preserved. It is + * always called with the element that came first in the input in @a, + * and list_sort is a stable sort, so it is not necessary to distinguish + * the @a < @b and @a == @b cases. + * + * This is compatible with two styles of @cmp function: + * - The traditional style which returns <0 / =0 / >0, or + * - Returning a boolean 0/1. + * The latter offers a chance to save a few cycles in the comparison + * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c). + * + * A good way to write a multi-word comparison is:: + * + * if (a->high != b->high) + * return a->high > b->high; + * if (a->middle != b->middle) + * return a->middle > b->middle; + * return a->low > b->low; + * + * + * This mergesort is as eager as possible while always performing at least + * 2:1 balanced merges. Given two pending sublists of size 2^k, they are + * merged to a size-2^(k+1) list as soon as we have 2^k following elements. + * + * Thus, it will avoid cache thrashing as long as 3*2^k elements can + * fit into the cache. Not quite as good as a fully-eager bottom-up + * mergesort, but it does use 0.2*n fewer comparisons, so is faster in + * the common case that everything fits into L1. + * + * + * The merging is controlled by "count", the number of elements in the + * pending lists. This is beautifully simple code, but rather subtle. + * + * Each time we increment "count", we set one bit (bit k) and clear + * bits k-1 .. 0. Each time this happens (except the very first time + * for each bit, when count increments to 2^k), we merge two lists of + * size 2^k into one list of size 2^(k+1). + * + * This merge happens exactly when the count reaches an odd multiple of + * 2^k, which is when we have 2^k elements pending in smaller lists, + * so it's safe to merge away two lists of size 2^k. + * + * After this happens twice, we have created two lists of size 2^(k+1), + * which will be merged into a list of size 2^(k+2) before we create + * a third list of size 2^(k+1), so there are never more than two pending. + * + * The number of pending lists of size 2^k is determined by the + * state of bit k of "count" plus two extra pieces of information: + * + * - The state of bit k-1 (when k == 0, consider bit -1 always set), and + * - Whether the higher-order bits are zero or non-zero (i.e. + * is count >= 2^(k+1)). + * + * There are six states we distinguish. "x" represents some arbitrary + * bits, and "y" represents some arbitrary non-zero bits: + * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k + * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * (merge and loop back to state 2) + * + * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because + * bit k-1 is set while the more significant bits are non-zero) and + * merge them away in the 5->2 transition. Note in particular that just + * before the 5->2 transition, all lower-order bits are 11 (state 3), + * so there is one list of each smaller size. + * + * When we reach the end of the input, we merge all the pending + * lists, from smallest to largest. If you work through cases 2 to + * 5 above, you can see that the number of elements we merge with a list + * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to + * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1). + */ +__attribute__((nonnull(2,3))) +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) +{ + struct list_head *list = head->next, *pending = NULL; + size_t count = 0; /* Count of pending */ + + if (list == head->prev) /* Zero or one elements */ + return; + + /* Convert to a null-terminated singly-linked list. */ + head->prev->next = NULL; + + /* + * Data structure invariants: + * - All lists are singly linked and null-terminated; prev + * pointers are not maintained. + * - pending is a prev-linked "list of lists" of sorted + * sublists awaiting further merging. + * - Each of the sorted sublists is power-of-two in size. + * - Sublists are sorted by size and age, smallest & newest at front. + * - There are zero to two sublists of each size. + * - A pair of pending sublists are merged as soon as the number + * of following pending elements equals their size (i.e. + * each time count reaches an odd multiple of that size). + * That ensures each later final merge will be at worst 2:1. + * - Each round consists of: + * - Merging the two sublists selected by the highest bit + * which flips when count is incremented, and + * - Adding an element from the input as a size-1 sublist. + */ + do { + size_t bits; + struct list_head **tail = &pending; + + /* Find the least-significant clear bit in count */ + for (bits = count; bits & 1; bits >>= 1) + tail = &(*tail)->prev; + /* Do the indicated merge */ + if (likely(bits)) { + struct list_head *a = *tail, *b = a->prev; + + a = merge(priv, cmp, b, a); + /* Install the merged result in place of the inputs */ + a->prev = b->prev; + *tail = a; + } + + /* Move one element from input list to pending */ + list->prev = pending; + pending = list; + list = list->next; + pending->next = NULL; + count++; + } while (list); + + /* End of input; merge together all the pending lists. */ + list = pending; + pending = pending->prev; + for (;;) { + struct list_head *next = pending->prev; + + if (!next) + break; + list = merge(priv, cmp, pending, list); + pending = next; + } + /* The final merge, rebuilding prev links */ + merge_final(priv, cmp, head, pending, list); +} +EXPORT_SYMBOL(list_sort); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 6d8e521c59e1..adaad3dddf6e 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -270,11 +270,19 @@ bool perf_cpu_map__empty(const struct perf_cpu_map *map) int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) { - int i; + int low = 0, high = cpus->nr; - for (i = 0; i < cpus->nr; ++i) { - if (cpus->map[i] == cpu) - return i; + while (low < high) { + int idx = (low + high) / 2, + cpu_at_idx = cpus->map[idx]; + + if (cpu_at_idx == cpu) + return idx; + + if (cpu_at_idx > cpu) + high = idx; + else + low = idx + 1; } return -1; diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 4d0c02ba3f7d..75ee385fb078 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -289,6 +289,11 @@ struct perf_record_itrace_start { __u32 tid; }; +struct perf_record_aux_output_hw_id { + struct perf_event_header header; + __u64 hw_id; +}; + struct perf_record_thread_map_entry { __u64 pid; char comm[16]; @@ -414,6 +419,7 @@ union perf_event { struct perf_record_auxtrace_error auxtrace_error; struct perf_record_aux aux; struct perf_record_itrace_start itrace_start; + struct perf_record_aux_output_hw_id aux_output_hw_id; struct perf_record_switch context_switch; struct perf_record_thread_map thread_map; struct perf_record_cpu_map cpu_map; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index c67c83399170..ce91a582f0e4 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -40,7 +40,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -70,10 +70,10 @@ static int test_stat_cpu(void) perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a184e4861627..33ae9334861a 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -22,7 +22,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -33,10 +33,10 @@ static int test_stat_cpu(void) err = perf_evsel__open(evsel, cpus, NULL); __T("failed to open evsel", err == 0); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } @@ -148,6 +148,7 @@ static int test_stat_user_read(int event) __T("failed to mmap evsel", err == 0); pc = perf_evsel__mmap_base(evsel, 0, 0); + __T("failed to get mmapped address", pc); #if defined(__i386__) || defined(__x86_64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index bc821056aba9..4d6d7fc13255 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -20,6 +20,7 @@ #include <objtool/arch.h> #include <objtool/warn.h> #include <objtool/endianness.h> +#include <objtool/builtin.h> #include <arch/elf.h> static int is_x86_64(const struct elf *elf) @@ -102,12 +103,13 @@ unsigned long arch_jump_destination(struct instruction *insn) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) #define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) -int arch_decode_instruction(const struct elf *elf, const struct section *sec, +int arch_decode_instruction(struct objtool_file *file, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, unsigned long *immediate, struct list_head *ops_list) { + const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; unsigned char op1, op2, @@ -544,6 +546,36 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, *type = INSN_RETURN; break; + case 0xc7: /* mov imm, r/m */ + if (!noinstr) + break; + + if (insn.length == 3+4+4 && !strncmp(sec->name, ".init.text", 10)) { + struct reloc *immr, *disp; + struct symbol *func; + int idx; + + immr = find_reloc_by_dest(elf, (void *)sec, offset+3); + disp = find_reloc_by_dest(elf, (void *)sec, offset+7); + + if (!immr || strcmp(immr->sym->name, "pv_ops")) + break; + + idx = (immr->addend + 8) / sizeof(void *); + + func = disp->sym; + if (disp->sym->type == STT_SECTION) + func = find_symbol_by_offset(disp->sym->sec, disp->addend); + if (!func) { + WARN("no func for pv_ops[]"); + return -1; + } + + objtool_pv_add(file, idx, func); + } + + break; + case 0xcf: /* iret */ /* * Handle sync_core(), which has an IRET to self. @@ -659,154 +691,52 @@ const char *arch_nop_insn(int len) return nops[len-1]; } -/* asm/alternative.h ? */ - -#define ALTINSTR_FLAG_INV (1 << 15) -#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) - -struct alt_instr { - s32 instr_offset; /* original instruction */ - s32 repl_offset; /* offset to replacement instruction */ - u16 cpuid; /* cpuid bit set for replacement */ - u8 instrlen; /* length of original instruction */ - u8 replacementlen; /* length of new instruction */ -} __packed; - -static int elf_add_alternative(struct elf *elf, - struct instruction *orig, struct symbol *sym, - int cpuid, u8 orig_len, u8 repl_len) -{ - const int size = sizeof(struct alt_instr); - struct alt_instr *alt; - struct section *sec; - Elf_Scn *s; - - sec = find_section_by_name(elf, ".altinstructions"); - if (!sec) { - sec = elf_create_section(elf, ".altinstructions", - SHF_ALLOC, size, 0); - - if (!sec) { - WARN_ELF("elf_create_section"); - return -1; - } - } - - s = elf_getscn(elf->elf, sec->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return -1; - } - - sec->data = elf_newdata(s); - if (!sec->data) { - WARN_ELF("elf_newdata"); - return -1; - } - - sec->data->d_size = size; - sec->data->d_align = 1; - - alt = sec->data->d_buf = malloc(size); - if (!sec->data->d_buf) { - perror("malloc"); - return -1; - } - memset(sec->data->d_buf, 0, size); - - if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size, - R_X86_64_PC32, orig->sec, orig->offset)) { - WARN("elf_create_reloc: alt_instr::instr_offset"); - return -1; - } - - if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4, - R_X86_64_PC32, sym, 0)) { - WARN("elf_create_reloc: alt_instr::repl_offset"); - return -1; - } - - alt->cpuid = bswap_if_needed(cpuid); - alt->instrlen = orig_len; - alt->replacementlen = repl_len; - - sec->sh.sh_size += size; - sec->changed = true; - - return 0; -} - -#define X86_FEATURE_RETPOLINE ( 7*32+12) +#define BYTE_RET 0xC3 -int arch_rewrite_retpolines(struct objtool_file *file) +const char *arch_ret_insn(int len) { - struct instruction *insn; - struct reloc *reloc; - struct symbol *sym; - char name[32] = ""; - - list_for_each_entry(insn, &file->retpoline_call_list, call_node) { - - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) - continue; - - if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) - continue; - - reloc = insn->reloc; - - sprintf(name, "__x86_indirect_alt_%s_%s", - insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call", - reloc->sym->name + 21); - - sym = find_symbol_by_name(file->elf, name); - if (!sym) { - sym = elf_create_undef_symbol(file->elf, name); - if (!sym) { - WARN("elf_create_undef_symbol"); - return -1; - } - } + static const char ret[5][5] = { + { BYTE_RET }, + { BYTE_RET, BYTES_NOP1 }, + { BYTE_RET, BYTES_NOP2 }, + { BYTE_RET, BYTES_NOP3 }, + { BYTE_RET, BYTES_NOP4 }, + }; - if (elf_add_alternative(file->elf, insn, sym, - ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) { - WARN("elf_add_alternative"); - return -1; - } + if (len < 1 || len > 5) { + WARN("invalid RET size: %d\n", len); + return NULL; } - return 0; + return ret[len-1]; } -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +int arch_decode_hint_reg(u8 sp_reg, int *base) { - struct cfi_reg *cfa = &insn->cfi.cfa; - switch (sp_reg) { case ORC_REG_UNDEFINED: - cfa->base = CFI_UNDEFINED; + *base = CFI_UNDEFINED; break; case ORC_REG_SP: - cfa->base = CFI_SP; + *base = CFI_SP; break; case ORC_REG_BP: - cfa->base = CFI_BP; + *base = CFI_BP; break; case ORC_REG_SP_INDIRECT: - cfa->base = CFI_SP_INDIRECT; + *base = CFI_SP_INDIRECT; break; case ORC_REG_R10: - cfa->base = CFI_R10; + *base = CFI_R10; break; case ORC_REG_R13: - cfa->base = CFI_R13; + *base = CFI_R13; break; case ORC_REG_DI: - cfa->base = CFI_DI; + *base = CFI_DI; break; case ORC_REG_DX: - cfa->base = CFI_DX; + *base = CFI_DX; break; default: return -1; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e5947fbb9e7a..add39902166d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -5,6 +5,7 @@ #include <string.h> #include <stdlib.h> +#include <sys/mman.h> #include <arch/elf.h> #include <objtool/builtin.h> @@ -26,7 +27,11 @@ struct alternative { bool skip_orig; }; -struct cfi_init_state initial_func_cfi; +static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; + +static struct cfi_init_state initial_func_cfi; +static struct cfi_state init_cfi; +static struct cfi_state func_cfi; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) @@ -173,6 +178,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "rewind_stack_do_exit", "kunit_try_catch_throw", "xen_start_kernel", + "cpu_bringup_and_idle", }; if (!func) @@ -265,6 +271,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec) state->noinstr = sec->noinstr; } +static struct cfi_state *cfi_alloc(void) +{ + struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); + if (!cfi) { + WARN("calloc failed"); + exit(1); + } + nr_cfi++; + return cfi; +} + +static int cfi_bits; +static struct hlist_head *cfi_hash; + +static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) +{ + return memcmp((void *)cfi1 + sizeof(cfi1->hash), + (void *)cfi2 + sizeof(cfi2->hash), + sizeof(struct cfi_state) - sizeof(struct hlist_node)); +} + +static inline u32 cfi_key(struct cfi_state *cfi) +{ + return jhash((void *)cfi + sizeof(cfi->hash), + sizeof(*cfi) - sizeof(cfi->hash), 0); +} + +static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + struct cfi_state *obj; + + hlist_for_each_entry(obj, head, hash) { + if (!cficmp(cfi, obj)) { + nr_cfi_cache++; + return obj; + } + } + + obj = cfi_alloc(); + *obj = *cfi; + hlist_add_head(&obj->hash, head); + + return obj; +} + +static void cfi_hash_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + + hlist_add_head(&cfi->hash, head); +} + +static void *cfi_hash_alloc(unsigned long size) +{ + cfi_bits = max(10, ilog2(size)); + cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (cfi_hash == (void *)-1L) { + WARN("mmap fail cfi_hash"); + cfi_hash = NULL; + } else if (stats) { + printf("cfi_bits: %d\n", cfi_bits); + } + + return cfi_hash; +} + +static unsigned long nr_insns; +static unsigned long nr_insns_visited; + /* * Call the arch-specific instruction decoder for all the instructions and add * them to the global instruction list. @@ -275,7 +353,6 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; - unsigned long nr_insns = 0; int ret; for_each_sec(file, sec) { @@ -292,7 +369,7 @@ static int decode_instructions(struct objtool_file *file) !strcmp(sec->name, ".entry.text")) sec->noinstr = true; - for (offset = 0; offset < sec->len; offset += insn->len) { + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { WARN("malloc failed"); @@ -301,13 +378,12 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); - init_cfi_state(&insn->cfi); insn->sec = sec; insn->offset = offset; - ret = arch_decode_instruction(file->elf, sec, offset, - sec->len - offset, + ret = arch_decode_instruction(file, sec, offset, + sec->sh.sh_size - offset, &insn->len, &insn->type, &insn->immediate, &insn->stack_ops); @@ -344,14 +420,90 @@ err: return ret; } +/* + * Read the pv_ops[] .data table to find the static initialized values. + */ +static int add_pv_ops(struct objtool_file *file, const char *symname) +{ + struct symbol *sym, *func; + unsigned long off, end; + struct reloc *rel; + int idx; + + sym = find_symbol_by_name(file->elf, symname); + if (!sym) + return 0; + + off = sym->offset; + end = off + sym->len; + for (;;) { + rel = find_reloc_by_dest_range(file->elf, sym->sec, off, end - off); + if (!rel) + break; + + func = rel->sym; + if (func->type == STT_SECTION) + func = find_symbol_by_offset(rel->sym->sec, rel->addend); + + idx = (rel->offset - sym->offset) / sizeof(unsigned long); + + objtool_pv_add(file, idx, func); + + off = rel->offset + 1; + if (off > end) + break; + } + + return 0; +} + +/* + * Allocate and initialize file->pv_ops[]. + */ +static int init_pv_ops(struct objtool_file *file) +{ + static const char *pv_ops_tables[] = { + "pv_ops", + "xen_cpu_ops", + "xen_irq_ops", + "xen_mmu_ops", + NULL, + }; + const char *pv_ops; + struct symbol *sym; + int idx, nr; + + if (!noinstr) + return 0; + + file->pv_ops = NULL; + + sym = find_symbol_by_name(file->elf, "pv_ops"); + if (!sym) + return 0; + + nr = sym->len / sizeof(unsigned long); + file->pv_ops = calloc(sizeof(struct pv_state), nr); + if (!file->pv_ops) + return -1; + + for (idx = 0; idx < nr; idx++) + INIT_LIST_HEAD(&file->pv_ops[idx].targets); + + for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) + add_pv_ops(file, pv_ops); + + return 0; +} + static struct instruction *find_last_insn(struct objtool_file *file, struct section *sec) { struct instruction *insn = NULL; unsigned int offset; - unsigned int end = (sec->len > 10) ? sec->len - 10 : 0; + unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - for (offset = sec->len - 1; offset >= end && !insn; offset--) + for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) insn = find_insn(file, sec, offset); return insn; @@ -389,7 +541,7 @@ static int add_dead_ends(struct objtool_file *file) insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find unreachable insn at %s+0x%x", @@ -424,7 +576,7 @@ reachable: insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find reachable insn at %s+0x%x", @@ -531,6 +683,52 @@ static int create_static_call_sections(struct objtool_file *file) return 0; } +static int create_retpoline_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".retpoline_sites"); + if (sec) { + WARN("file already has .retpoline_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".retpoline_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .retpoline_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .retpoline_sites"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -549,7 +747,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) + list_for_each_entry(insn, &file->mcount_loc_list, call_node) idx++; sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); @@ -557,7 +755,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return -1; idx = 0; - list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { + list_for_each_entry(insn, &file->mcount_loc_list, call_node) { loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); @@ -817,6 +1015,9 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i return NULL; if (!insn->reloc) { + if (!file) + return NULL; + insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len); if (!insn->reloc) { @@ -828,6 +1029,136 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i return insn->reloc; } +static void remove_insn_ops(struct instruction *insn) +{ + struct stack_op *op, *tmp; + + list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { + list_del(&op->list); + free(op); + } +} + +static void annotate_call_site(struct objtool_file *file, + struct instruction *insn, bool sibling) +{ + struct reloc *reloc = insn_reloc(file, insn); + struct symbol *sym = insn->call_dest; + + if (!sym) + sym = reloc->sym; + + /* + * Alternative replacement code is just template code which is + * sometimes copied to the original instruction. For now, don't + * annotate it. (In the future we might consider annotating the + * original instruction if/when it ever makes sense to do so.) + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + return; + + if (sym->static_call_tramp) { + list_add_tail(&insn->call_node, &file->static_call_list); + return; + } + + if (sym->retpoline_thunk) { + list_add_tail(&insn->call_node, &file->retpoline_call_list); + return; + } + + /* + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ + if (insn->sec->noinstr && sym->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } + + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + sibling ? arch_ret_insn(insn->len) + : arch_nop_insn(insn->len)); + + insn->type = sibling ? INSN_RETURN : INSN_NOP; + return; + } + + if (mcount && sym->fentry) { + if (sibling) + WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset); + + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } + + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len)); + + insn->type = INSN_NOP; + + list_add_tail(&insn->call_node, &file->mcount_loc_list); + return; + } +} + +static void add_call_dest(struct objtool_file *file, struct instruction *insn, + struct symbol *dest, bool sibling) +{ + insn->call_dest = dest; + if (!dest) + return; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); + + annotate_call_site(file, insn, sibling); +} + +static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) +{ + /* + * Retpoline calls/jumps are really dynamic calls/jumps in disguise, + * so convert them accordingly. + */ + switch (insn->type) { + case INSN_CALL: + insn->type = INSN_CALL_DYNAMIC; + break; + case INSN_JUMP_UNCONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC; + break; + case INSN_JUMP_CONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + break; + default: + return; + } + + insn->retpoline_safe = true; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); + + annotate_call_site(file, insn, false); +} /* * Find the destination instructions for all jumps. */ @@ -849,28 +1180,12 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (arch_is_retpoline(reloc->sym)) { - /* - * Retpoline jumps are really dynamic jumps in - * disguise, so convert them accordingly. - */ - if (insn->type == INSN_JUMP_UNCONDITIONAL) - insn->type = INSN_JUMP_DYNAMIC; - else - insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; - - list_add_tail(&insn->call_node, - &file->retpoline_call_list); - - insn->retpoline_safe = true; + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ - insn->call_dest = reloc->sym; - if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->call_node, - &file->static_call_list); - } + add_call_dest(file, insn, reloc->sym, true); continue; } else if (reloc->sym->sec->idx) { dest_sec = reloc->sym->sec; @@ -926,13 +1241,8 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - /* internal sibling call (without reloc) */ - insn->call_dest = insn->jump_dest->func; - if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->call_node, - &file->static_call_list); - } + add_call_dest(file, insn, insn->jump_dest->func, true); } } } @@ -940,16 +1250,6 @@ static int add_jump_destinations(struct objtool_file *file) return 0; } -static void remove_insn_ops(struct instruction *insn) -{ - struct stack_op *op, *tmp; - - list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { - list_del(&op->list); - free(op); - } -} - static struct symbol *find_call_destination(struct section *sec, unsigned long offset) { struct symbol *call_dest; @@ -968,6 +1268,7 @@ static int add_call_destinations(struct objtool_file *file) { struct instruction *insn; unsigned long dest_off; + struct symbol *dest; struct reloc *reloc; for_each_insn(file, insn) { @@ -977,7 +1278,9 @@ static int add_call_destinations(struct objtool_file *file) reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); - insn->call_dest = find_call_destination(insn->sec, dest_off); + dest = find_call_destination(insn->sec, dest_off); + + add_call_dest(file, insn, dest, false); if (insn->ignore) continue; @@ -995,9 +1298,8 @@ static int add_call_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_off = arch_dest_reloc_offset(reloc->addend); - insn->call_dest = find_call_destination(reloc->sym->sec, - dest_off); - if (!insn->call_dest) { + dest = find_call_destination(reloc->sym->sec, dest_off); + if (!dest) { WARN_FUNC("can't find call dest symbol at %s+0x%lx", insn->sec, insn->offset, reloc->sym->sec->name, @@ -1005,70 +1307,13 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - } else if (arch_is_retpoline(reloc->sym)) { - /* - * Retpoline calls are really dynamic calls in - * disguise, so convert them accordingly. - */ - insn->type = INSN_CALL_DYNAMIC; - insn->retpoline_safe = true; + add_call_dest(file, insn, dest, false); - list_add_tail(&insn->call_node, - &file->retpoline_call_list); - - remove_insn_ops(insn); - continue; + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); } else - insn->call_dest = reloc->sym; - - if (insn->call_dest && insn->call_dest->static_call_tramp) { - list_add_tail(&insn->call_node, - &file->static_call_list); - } - - /* - * Many compilers cannot disable KCOV with a function attribute - * so they need a little help, NOP out any KCOV calls from noinstr - * text. - */ - if (insn->sec->noinstr && - !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); - } - - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); - insn->type = INSN_NOP; - } - - if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); - } - - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); - - insn->type = INSN_NOP; - - list_add_tail(&insn->mcount_loc_node, - &file->mcount_loc_list); - } - - /* - * Whatever stack impact regular CALLs have, should be undone - * by the RETURN of the called function. - * - * Annotated intra-function calls retain the stack_ops but - * are converted to JUMP, see read_intra_function_calls(). - */ - remove_insn_ops(insn); + add_call_dest(file, insn, reloc->sym, false); } return 0; @@ -1136,7 +1381,6 @@ static int handle_group_alt(struct objtool_file *file, memset(nop, 0, sizeof(*nop)); INIT_LIST_HEAD(&nop->alts); INIT_LIST_HEAD(&nop->stack_ops); - init_cfi_state(&nop->cfi); nop->sec = special_alt->new_sec; nop->offset = special_alt->new_off + special_alt->new_len; @@ -1545,10 +1789,11 @@ static void set_func_state(struct cfi_state *state) static int read_unwind_hints(struct objtool_file *file) { + struct cfi_state cfi = init_cfi; struct section *sec, *relocsec; - struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; + struct reloc *reloc; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1561,14 +1806,14 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - if (sec->len % sizeof(struct unwind_hint)) { + if (sec->sh.sh_size % sizeof(struct unwind_hint)) { WARN("struct unwind_hint size mismatch"); return -1; } file->hints = true; - for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { + for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) { hint = (struct unwind_hint *)sec->data->d_buf + i; reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint)); @@ -1586,19 +1831,24 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; if (hint->type == UNWIND_HINT_TYPE_FUNC) { - set_func_state(&insn->cfi); + insn->cfi = &func_cfi; continue; } - if (arch_decode_hint_reg(insn, hint->sp_reg)) { + if (insn->cfi) + cfi = *(insn->cfi); + + if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); - insn->cfi.type = hint->type; - insn->cfi.end = hint->end; + cfi.cfa.offset = bswap_if_needed(hint->sp_offset); + cfi.type = hint->type; + cfi.end = hint->end; + + insn->cfi = cfi_hash_find_or_add(&cfi); } return 0; @@ -1737,17 +1987,28 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } -static int read_static_call_tramps(struct objtool_file *file) +static int classify_symbols(struct objtool_file *file) { struct section *sec; struct symbol *func; for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->bind == STB_GLOBAL && - !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + if (func->bind != STB_GLOBAL) + continue; + + if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, strlen(STATIC_CALL_TRAMP_PREFIX_STR))) func->static_call_tramp = true; + + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + + if (!strncmp(func->name, "__sanitizer_cov_", 16)) + func->kcov = true; } } @@ -1780,17 +2041,16 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } -__weak int arch_rewrite_retpolines(struct objtool_file *file) -{ - return 0; -} - static int decode_sections(struct objtool_file *file) { int ret; mark_rodata(file); + ret = init_pv_ops(file); + if (ret) + return ret; + ret = decode_instructions(file); if (ret) return ret; @@ -1809,7 +2069,7 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ - ret = read_static_call_tramps(file); + ret = classify_symbols(file); if (ret) return ret; @@ -1853,23 +2113,14 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_special_section_alts(), since this will emit - * alternatives. Must be after add_{jump,call}_destination(), since - * those create the call insn lists. - */ - ret = arch_rewrite_retpolines(file); - if (ret) - return ret; - return 0; } static bool is_fentry_call(struct instruction *insn) { - if (insn->type == INSN_CALL && insn->call_dest && - insn->call_dest->type == STT_NOTYPE && - !strcmp(insn->call_dest->name, "__fentry__")) + if (insn->type == INSN_CALL && + insn->call_dest && + insn->call_dest->fentry) return true; return false; @@ -2452,13 +2703,18 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn if (!insn->alt_group) return 0; + if (!insn->cfi) { + WARN("CFI missing"); + return -1; + } + alt_cfi = insn->alt_group->cfi; group_off = insn->offset - insn->alt_group->first_insn->offset; if (!alt_cfi[group_off]) { - alt_cfi[group_off] = &insn->cfi; + alt_cfi[group_off] = insn->cfi; } else { - if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { + if (cficmp(alt_cfi[group_off], insn->cfi)) { WARN_FUNC("stack layout conflict in alternatives", insn->sec, insn->offset); return -1; @@ -2509,9 +2765,14 @@ static int handle_insn_ops(struct instruction *insn, static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { - struct cfi_state *cfi1 = &insn->cfi; + struct cfi_state *cfi1 = insn->cfi; int i; + if (!cfi1) { + WARN("CFI missing"); + return false; + } + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", @@ -2562,20 +2823,64 @@ static inline bool func_uaccess_safe(struct symbol *func) static inline const char *call_dest_name(struct instruction *insn) { + static char pvname[16]; + struct reloc *rel; + int idx; + if (insn->call_dest) return insn->call_dest->name; + rel = insn_reloc(NULL, insn); + if (rel && !strcmp(rel->sym->name, "pv_ops")) { + idx = (rel->addend / sizeof(void *)); + snprintf(pvname, sizeof(pvname), "pv_ops[%d]", idx); + return pvname; + } + return "{dynamic}"; } -static inline bool noinstr_call_dest(struct symbol *func) +static bool pv_call_dest(struct objtool_file *file, struct instruction *insn) +{ + struct symbol *target; + struct reloc *rel; + int idx; + + rel = insn_reloc(file, insn); + if (!rel || strcmp(rel->sym->name, "pv_ops")) + return false; + + idx = (arch_dest_reloc_offset(rel->addend) / sizeof(void *)); + + if (file->pv_ops[idx].clean) + return true; + + file->pv_ops[idx].clean = true; + + list_for_each_entry(target, &file->pv_ops[idx].targets, pv_target) { + if (!target->sec->noinstr) { + WARN("pv_ops[%d]: %s", idx, target->name); + file->pv_ops[idx].clean = false; + } + } + + return file->pv_ops[idx].clean; +} + +static inline bool noinstr_call_dest(struct objtool_file *file, + struct instruction *insn, + struct symbol *func) { /* * We can't deal with indirect function calls at present; * assume they're instrumented. */ - if (!func) + if (!func) { + if (file->pv_ops) + return pv_call_dest(file, insn); + return false; + } /* * If the symbol is from a noinstr section; we good. @@ -2594,10 +2899,12 @@ static inline bool noinstr_call_dest(struct symbol *func) return false; } -static int validate_call(struct instruction *insn, struct insn_state *state) +static int validate_call(struct objtool_file *file, + struct instruction *insn, + struct insn_state *state) { if (state->noinstr && state->instr <= 0 && - !noinstr_call_dest(insn->call_dest)) { + !noinstr_call_dest(file, insn, insn->call_dest)) { WARN_FUNC("call to %s() leaves .noinstr.text section", insn->sec, insn->offset, call_dest_name(insn)); return 1; @@ -2618,7 +2925,9 @@ static int validate_call(struct instruction *insn, struct insn_state *state) return 0; } -static int validate_sibling_call(struct instruction *insn, struct insn_state *state) +static int validate_sibling_call(struct objtool_file *file, + struct instruction *insn, + struct insn_state *state) { if (has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", @@ -2626,7 +2935,7 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st return 1; } - return validate_call(insn, state); + return validate_call(file, insn, state); } static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state) @@ -2696,7 +3005,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { struct alternative *alt; - struct instruction *next_insn; + struct instruction *next_insn, *prev_insn = NULL; struct section *sec; u8 visited; int ret; @@ -2725,15 +3034,25 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (insn->visited & visited) return 0; + } else { + nr_insns_visited++; } if (state.noinstr) state.instr += insn->instr; - if (insn->hint) - state.cfi = insn->cfi; - else - insn->cfi = state.cfi; + if (insn->hint) { + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ + + if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { + insn->cfi = prev_insn->cfi; + nr_cfi_reused++; + } else { + insn->cfi = cfi_hash_find_or_add(&state.cfi); + } + } insn->visited |= visited; @@ -2769,7 +3088,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_CALL: case INSN_CALL_DYNAMIC: - ret = validate_call(insn, &state); + ret = validate_call(file, insn, &state); if (ret) return ret; @@ -2788,7 +3107,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: if (is_sibling_call(insn)) { - ret = validate_sibling_call(insn, &state); + ret = validate_sibling_call(file, insn, &state); if (ret) return ret; @@ -2810,7 +3129,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_DYNAMIC: case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { - ret = validate_sibling_call(insn, &state); + ret = validate_sibling_call(file, insn, &state); if (ret) return ret; } @@ -2883,6 +3202,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } + prev_insn = insn; insn = next_insn; } @@ -2909,7 +3229,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) } while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { - if (insn->hint && !insn->visited) { + if (insn->hint && !insn->visited && !insn->ignore) { ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); @@ -3138,10 +3458,20 @@ int check(struct objtool_file *file) int ret, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); + init_cfi_state(&init_cfi); + init_cfi_state(&func_cfi); + set_func_state(&func_cfi); + + if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) + goto out; + + cfi_hash_add(&init_cfi); + cfi_hash_add(&func_cfi); ret = decode_sections(file); if (ret < 0) goto out; + warnings += ret; if (list_empty(&file->insn_list)) @@ -3185,6 +3515,13 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (retpoline) { + ret = create_retpoline_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) @@ -3192,6 +3529,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (stats) { + printf("nr_insns_visited: %ld\n", nr_insns_visited); + printf("nr_cfi: %ld\n", nr_cfi); + printf("nr_cfi_reused: %ld\n", nr_cfi_reused); + printf("nr_cfi_cache: %ld\n", nr_cfi_cache); + } + out: /* * For now, don't fail the kernel build on fatal warnings. These diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8676c7598728..81a4c543ff7e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -286,10 +286,9 @@ static int read_sections(struct elf *elf) return -1; } } - sec->len = sec->sh.sh_size; if (sec->sh.sh_flags & SHF_EXECINSTR) - elf->text_size += sec->len; + elf->text_size += sec->sh.sh_size; list_add_tail(&sec->list, &elf->sections); elf_hash_add(section, &sec->hash, sec->idx); @@ -509,6 +508,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); + sec->reloc->sh.sh_size += sec->reloc->sh.sh_entsize; sec->reloc->changed = true; return 0; @@ -734,97 +734,13 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) data->d_size = strlen(str) + 1; data->d_align = 1; - len = strtab->len; - strtab->len += data->d_size; + len = strtab->sh.sh_size; + strtab->sh.sh_size += data->d_size; strtab->changed = true; return len; } -struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) -{ - struct section *symtab, *symtab_shndx; - struct symbol *sym; - Elf_Data *data; - Elf_Scn *s; - - sym = malloc(sizeof(*sym)); - if (!sym) { - perror("malloc"); - return NULL; - } - memset(sym, 0, sizeof(*sym)); - - sym->name = strdup(name); - - sym->sym.st_name = elf_add_string(elf, NULL, sym->name); - if (sym->sym.st_name == -1) - return NULL; - - sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); - // st_other 0 - // st_shndx 0 - // st_value 0 - // st_size 0 - - symtab = find_section_by_name(elf, ".symtab"); - if (!symtab) { - WARN("can't find .symtab"); - return NULL; - } - - s = elf_getscn(elf->elf, symtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = &sym->sym; - data->d_size = sizeof(sym->sym); - data->d_align = 1; - data->d_type = ELF_T_SYM; - - sym->idx = symtab->len / sizeof(sym->sym); - - symtab->len += data->d_size; - symtab->changed = true; - - symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); - if (symtab_shndx) { - s = elf_getscn(elf->elf, symtab_shndx->idx); - if (!s) { - WARN_ELF("elf_getscn"); - return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = &sym->sym.st_size; /* conveniently 0 */ - data->d_size = sizeof(Elf32_Word); - data->d_align = 4; - data->d_type = ELF_T_WORD; - - symtab_shndx->len += 4; - symtab_shndx->changed = true; - } - - sym->sec = find_section_by_index(elf, 0); - - elf_add_symbol(elf, sym); - - return sym; -} - struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { @@ -855,7 +771,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, } sec->idx = elf_ndxscn(s); - sec->len = size; sec->changed = true; sec->data = elf_newdata(s); @@ -979,63 +894,63 @@ static struct section *elf_create_reloc_section(struct elf *elf, } } -static int elf_rebuild_rel_reloc_section(struct section *sec, int nr) +static int elf_rebuild_rel_reloc_section(struct section *sec) { struct reloc *reloc; - int idx = 0, size; + int idx = 0; void *buf; /* Allocate a buffer for relocations */ - size = nr * sizeof(GElf_Rel); - buf = malloc(size); + buf = malloc(sec->sh.sh_size); if (!buf) { perror("malloc"); return -1; } sec->data->d_buf = buf; - sec->data->d_size = size; + sec->data->d_size = sec->sh.sh_size; sec->data->d_type = ELF_T_REL; - sec->sh.sh_size = size; - idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { reloc->rel.r_offset = reloc->offset; reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); - gelf_update_rel(sec->data, idx, &reloc->rel); + if (!gelf_update_rel(sec->data, idx, &reloc->rel)) { + WARN_ELF("gelf_update_rel"); + return -1; + } idx++; } return 0; } -static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) +static int elf_rebuild_rela_reloc_section(struct section *sec) { struct reloc *reloc; - int idx = 0, size; + int idx = 0; void *buf; /* Allocate a buffer for relocations with addends */ - size = nr * sizeof(GElf_Rela); - buf = malloc(size); + buf = malloc(sec->sh.sh_size); if (!buf) { perror("malloc"); return -1; } sec->data->d_buf = buf; - sec->data->d_size = size; + sec->data->d_size = sec->sh.sh_size; sec->data->d_type = ELF_T_RELA; - sec->sh.sh_size = size; - idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { reloc->rela.r_offset = reloc->offset; reloc->rela.r_addend = reloc->addend; reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); - gelf_update_rela(sec->data, idx, &reloc->rela); + if (!gelf_update_rela(sec->data, idx, &reloc->rela)) { + WARN_ELF("gelf_update_rela"); + return -1; + } idx++; } @@ -1044,16 +959,9 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { - struct reloc *reloc; - int nr; - - nr = 0; - list_for_each_entry(reloc, &sec->reloc_list, list) - nr++; - switch (sec->sh.sh_type) { - case SHT_REL: return elf_rebuild_rel_reloc_section(sec, nr); - case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr); + case SHT_REL: return elf_rebuild_rel_reloc_section(sec); + case SHT_RELA: return elf_rebuild_rela_reloc_section(sec); default: return -1; } } @@ -1113,12 +1021,6 @@ int elf_write(struct elf *elf) /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { - if (sec->base && - elf_rebuild_reloc_section(elf, sec)) { - WARN("elf_rebuild_reloc_section"); - return -1; - } - s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -1129,6 +1031,12 @@ int elf_write(struct elf *elf) return -1; } + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + sec->changed = false; elf->changed = true; } diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 062bb6e9b865..589ff58426ab 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -69,7 +69,7 @@ struct instruction; void arch_initial_func_cfi_state(struct cfi_init_state *state); -int arch_decode_instruction(const struct elf *elf, const struct section *sec, +int arch_decode_instruction(struct objtool_file *file, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, unsigned long *immediate, @@ -82,8 +82,9 @@ unsigned long arch_jump_destination(struct instruction *insn); unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); +const char *arch_ret_insn(int len); -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h index fd5cb0bed9bf..f11d1ac1dadf 100644 --- a/tools/objtool/include/objtool/cfi.h +++ b/tools/objtool/include/objtool/cfi.h @@ -7,6 +7,7 @@ #define _OBJTOOL_CFI_H #include <arch/cfi_regs.h> +#include <linux/list.h> #define CFI_UNDEFINED -1 #define CFI_CFA -2 @@ -24,6 +25,7 @@ struct cfi_init_state { }; struct cfi_state { + struct hlist_node hash; /* must be first, cficmp() */ struct cfi_reg regs[CFI_NUM_REGS]; struct cfi_reg vals[CFI_NUM_REGS]; struct cfi_reg cfa; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 56d50bc50c10..6cfff078897f 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -40,7 +40,6 @@ struct instruction { struct list_head list; struct hlist_node hash; struct list_head call_node; - struct list_head mcount_loc_node; struct section *sec; unsigned long offset; unsigned int len; @@ -60,7 +59,7 @@ struct instruction { struct list_head alts; struct symbol *func; struct list_head stack_ops; - struct cfi_state cfi; + struct cfi_state *cfi; }; static inline bool is_static_jump(struct instruction *insn) diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e34395047530..cdc739fa9a6f 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -38,7 +38,6 @@ struct section { Elf_Data *data; char *name; int idx; - unsigned int len; bool changed, text, rodata, noinstr; }; @@ -55,8 +54,12 @@ struct symbol { unsigned long offset; unsigned int len; struct symbol *pfunc, *cfunc, *alias; - bool uaccess_safe; - bool static_call_tramp; + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; + u8 fentry : 1; + u8 kcov : 1; + struct list_head pv_target; }; struct reloc { @@ -141,7 +144,6 @@ int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); -struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf); diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 24fa83634de4..f99fbc6078d5 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -14,6 +14,11 @@ #define __weak __attribute__((weak)) +struct pv_state { + bool clean; + struct list_head targets; +}; + struct objtool_file { struct elf *elf; struct list_head insn_list; @@ -25,10 +30,14 @@ struct objtool_file { unsigned long jl_short, jl_long; unsigned long jl_nop_short, jl_nop_long; + + struct pv_state *pv_ops; }; struct objtool_file *objtool_open_read(const char *_objname); +void objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func); + int check(struct objtool_file *file); int orc_dump(const char *objname); int orc_create(struct objtool_file *file); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index e21db8bce493..c90c7084e45a 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -135,6 +135,28 @@ struct objtool_file *objtool_open_read(const char *_objname) return &file; } +void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) +{ + if (!noinstr) + return; + + if (!f->pv_ops) { + WARN("paravirt confusion"); + return; + } + + /* + * These functions will be patched into native code, + * see paravirt_patch(). + */ + if (!strcmp(func->name, "_paravirt_nop") || + !strcmp(func->name, "_paravirt_ident_64")) + return; + + list_add(&func->pv_target, &f->pv_ops[idx].targets); + f->pv_ops[idx].clean = false; +} + static void cmd_usage(void) { unsigned int i, longest = 0; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dc9b7dd314b0..dd3c64af9db2 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -13,13 +13,19 @@ #include <objtool/warn.h> #include <objtool/endianness.h> -static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, + struct instruction *insn) { - struct instruction *insn = container_of(cfi, struct instruction, cfi); struct cfi_reg *bp = &cfi->regs[CFI_BP]; memset(orc, 0, sizeof(*orc)); + if (!cfi) { + orc->end = 0; + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; + } + orc->end = cfi->end; if (cfi->cfa.base == CFI_UNDEFINED) { @@ -162,7 +168,7 @@ int orc_create(struct objtool_file *file) int i; if (!alt_group) { - if (init_orc_entry(&orc, &insn->cfi)) + if (init_orc_entry(&orc, insn->cfi, insn)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; @@ -186,7 +192,8 @@ int orc_create(struct objtool_file *file) struct cfi_state *cfi = alt_group->cfi[i]; if (!cfi) continue; - if (init_orc_entry(&orc, cfi)) + /* errors are reported on the original insn */ + if (init_orc_entry(&orc, cfi, insn)) return -1; if (!memcmp(&prev_orc, &orc, sizeof(orc))) continue; @@ -204,7 +211,7 @@ int orc_create(struct objtool_file *file) /* Add a section terminator */ if (!empty) { - orc_list_add(&orc_list, &null, sec, sec->len); + orc_list_add(&orc_list, &null, sec, sec->sh.sh_size); nr++; } } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index f1428e32a505..e2223dd91c37 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -58,22 +58,11 @@ void __weak arch_handle_alternative(unsigned short feature, struct special_alt * { } -static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off) +static void reloc_to_sec_off(struct reloc *reloc, struct section **sec, + unsigned long *off) { - switch (reloc->sym->type) { - case STT_FUNC: - *sec = reloc->sym->sec; - *off = reloc->sym->offset + reloc->addend; - return true; - - case STT_SECTION: - *sec = reloc->sym->sec; - *off = reloc->addend; - return true; - - default: - return false; - } + *sec = reloc->sym->sec; + *off = reloc->sym->offset + reloc->addend; } static int get_alt_entry(struct elf *elf, struct special_entry *entry, @@ -109,13 +98,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); return -1; } - if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { - WARN_FUNC("don't know how to handle reloc symbol type %d: %s", - sec, offset + entry->orig, - orig_reloc->sym->type, - orig_reloc->sym->name); - return -1; - } + + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); @@ -125,21 +109,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } - /* - * Skip retpoline .altinstr_replacement... we already rewrite the - * instructions for retpolines anyway, see arch_is_retpoline() - * usage in add_{call,jump}_destinations(). - */ - if (arch_is_retpoline(new_reloc->sym)) - return 1; - - if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { - WARN_FUNC("don't know how to handle reloc symbol type %d: %s", - sec, offset + entry->new, - new_reloc->sym->type, - new_reloc->sym->name); - return -1; - } + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ if (alt->new_off >= 0x7ffffff0) @@ -181,13 +151,13 @@ int special_get_alts(struct elf *elf, struct list_head *alts) if (!sec) continue; - if (sec->len % entry->size != 0) { + if (sec->sh.sh_size % entry->size != 0) { WARN("%s size not a multiple of %d", sec->name, entry->size); return -1; } - nr_entries = sec->len / entry->size; + nr_entries = sec->sh.sh_size / entry->size; for (idx = 0; idx < nr_entries; idx++) { alt = malloc(sizeof(*alt)); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 8e0163b7ef01..20b8ab984d5f 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -37,6 +37,7 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents feature/ +libbpf/ fixdep libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 2d586fe5e4c5..c52755481e2f 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, @@ -61,5 +62,6 @@ debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags are: a all perf events + o output to stdout If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 91108fe3ad5f..0570a1ccd344 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -45,6 +45,13 @@ OPTIONS tasks slept. sched_switch contains a callchain where a task slept and sched_stat contains a timeslice how long a task slept. +-k:: +--vmlinux=<file>:: + vmlinux pathname + +--ignore-vmlinux:: + Ignore vmlinux files. + --kallsyms=<file>:: kallsyms pathname diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index db465fa7ee91..cbb920f5d056 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -157,6 +157,17 @@ of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC for that event type. Note IPC for "branches" events is calculated separately from IPC for "instructions" events. +Even with the 'cyc' config term, it is possible to produce IPC information for +every change of timestamp, but at the expense of accuracy. That is selected by +specifying the itrace 'A' option. Due to the granularity of timestamps, the +actual number of cycles increases even though the cycles reported does not. +The number of instructions is known, but if IPC is reported, cycles can be too +low and so IPC is too high. Note that inaccuracy decreases as the period of +sampling increases i.e. if the number of cycles is too low by a small amount, +that becomes less significant if the number of cycles is large. It may also be +useful to use the 'A' option in conjunction with dlfilter-show-cycles.so to +provide higher granularity cycle information. + Also note that the IPC instruction count may or may not include the current instruction. If the cycle count is associated with an asynchronous branch (e.g. page fault or interrupt), then the instruction count does not include the @@ -873,6 +884,7 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e @@ -941,6 +953,7 @@ by flags which affect what debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags support by Intel PT are: -a Suppress logging of perf events +a Log all perf events + +o Output to stdout instead of "intel_pt.log" By default, logged perf events are filtered by any specified time ranges, but flag +a overrides that. @@ -1072,6 +1085,21 @@ The Z option is equivalent to having recorded a trace without TSC decoding a trace of a virtual machine. +dlfilter-show-cycles.so +~~~~~~~~~~~~~~~~~~~~~~~ + +Cycles can be displayed using dlfilter-show-cycles.so in which case the itrace A +option can be useful to provide higher granularity cycle information: + + perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so + +To see a list of dlfilters: + + perf script -v --list-dlfilters + +See also linkperf:perf-dlfilters[1] + + dump option ~~~~~~~~~~~ @@ -1144,7 +1172,12 @@ Recording is selected by using the aux-output config term e.g. perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname -Note that currently, software only supports redirecting at most one PEBS event. +Originally, software only supported redirecting at most one PEBS event because it +was not able to differentiate one event from another. To overcome that, more recent +kernels and perf tools add support for the PERF_RECORD_AUX_OUTPUT_HW_ID side-band event. +To check for the presence of that event in a PEBS-via-PT trace: + + perf script -D --no-itrace | grep PERF_RECORD_AUX_OUTPUT_HW_ID To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 85b8ac695c87..f378ac59353d 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -8,22 +8,25 @@ perf-kmem - Tool to trace/measure kernel memory properties SYNOPSIS -------- [verse] -'perf kmem' {record|stat} [<options>] +'perf kmem' [<options>] {record|stat} DESCRIPTION ----------- There are two variants of perf kmem: - 'perf kmem record <command>' to record the kmem events - of an arbitrary workload. + 'perf kmem [<options>] record [<perf-record-options>] <command>' to + record the kmem events of an arbitrary workload. Additional 'perf + record' options may be specified after record, such as '-o' to + change the output file name. - 'perf kmem stat' to report kernel memory statistics. + 'perf kmem [<options>] stat' to report kernel memory statistics. OPTIONS ------- -i <file>:: --input=<file>:: - Select the input file (default: perf.data unless stdin is a fifo) + For stat, select the input file (default: perf.data unless stdin is a + fifo) -f:: --force:: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 4c7db1da8fcc..4dc8d0af19df 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,6 +39,10 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. +--cputype:: +Print events applying cpu with this type for hybrid platform +(e.g. --cputype core or --cputype atom) + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index f1079ee7f2ec..2d7df8703cf2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -596,6 +596,22 @@ options. 'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj in config file is set to true. +--synth=TYPE:: +Collect and synthesize given type of events (comma separated). Note that +this option controls the synthesis from the /proc filesystem which represent +task status for pre-existing threads. + +Kernel (and some other) events are recorded regardless of the +choice in this option. For example, --synth=no would have MMAP events for +kernel and modules. + +Available types are: + 'task' - synthesize FORK and COMM events for each task + 'mmap' - synthesize MMAP events for each process (implies 'task') + 'cgroup' - synthesize CGROUP events for each cgroup + 'all' - synthesize all events (default) + 'no' - do not synthesize any of the above events + --tail-synthesize:: Instead of collecting non-sample events (for example, fork, comm, mmap) at the beginning of record, collect them during finalizing an output file. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c80515243560..b0070718784d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -130,7 +130,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size, code_page_size. + metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index e6ff8c898ada..f56d0e0fbff6 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -346,7 +346,7 @@ to special needs. HEADER_BPF_PROG_INFO = 25, -struct bpf_prog_info_linear, which contains detailed information about +struct perf_bpil, which contains detailed information about a BPF program, including type, id, tag, jited/xlated instructions, etc. HEADER_BPF_BTF = 26, diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f05c4d48fd7e..f5d72f936a6b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -17,7 +17,11 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/find_bit.c tools/lib/bitmap.c +tools/lib/list_sort.c tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c scripts/bpf_doc.py +tools/bpf/bpftool +kernel/bpf/disasm.c +kernel/bpf/disasm.h diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 446180401e26..07e65a061fd3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -143,7 +143,7 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -OPENCSDLIBS := -lopencsd_c_api -lopencsd +OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++ ifdef CSLIBS LIBOPENCSD_LDFLAGS := -L$(CSLIBS) endif @@ -285,7 +285,7 @@ CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra CORE_CFLAGS += -std=gnu99 -CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti +CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall CXXFLAGS += -fno-omit-frame-pointer CXXFLAGS += -ggdb3 @@ -1093,11 +1093,32 @@ ifdef LIBTRACEEVENT_DYNAMIC $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) EXTLIBS += -ltraceevent + LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent) + LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) + CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) else dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); endif endif +ifdef LIBTRACEFS_DYNAMIC + $(call feature_check,libtracefs) + ifeq ($(feature-libtracefs), 1) + EXTLIBS += -ltracefs + LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs) + LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) + CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) + else + dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); + endif +endif + # Among the variables below, these: # perfexecdir # perf_include_dir diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e04313c4d840..0777748b6da8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -130,6 +130,8 @@ include ../scripts/utilities.mak # # Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking # +# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -241,7 +243,7 @@ else # force_fixdep LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ -BPF_DIR = $(srctree)/tools/lib/bpf/ +LIBBPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -293,7 +295,6 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - BPF_PATH=$(OUTPUT) SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) ifneq ($(subdir),) @@ -305,7 +306,6 @@ else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) - BPF_PATH=$(BPF_DIR) SUBCMD_PATH=$(SUBCMD_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -324,7 +324,14 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DY LIBAPI = $(API_PATH)libapi.a export LIBAPI -LIBBPF = $(BPF_PATH)libbpf.a +ifneq ($(OUTPUT),) + LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf +else + LIBBPF_OUTPUT = $(CURDIR)/libbpf +endif +LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a @@ -360,7 +367,7 @@ ifndef NO_JVMTI PROGRAMS += $(OUTPUT)$(LIBJVMTI) endif -DLFILTERS := dlfilter-test-api-v0.so +DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS)) # what 'all' will build and 'install' will install, in perfexecdir @@ -787,6 +794,8 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h $(Q)$(MKDIR) -p $(OUTPUT)dlfilters $(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $< +.SECONDARY: $(DLFILTERS:.so=.o) + $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o $(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $< @@ -802,7 +811,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a @@ -827,12 +836,14 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBBPF): FORCE - $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $@ install_headers $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) - $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(LIBPERF): FORCE $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a @@ -1032,16 +1043,15 @@ SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h +$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): + $(Q)$(MKDIR) -p $@ + ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool -LIBBPF_SRC := $(abspath ../lib/bpf) -BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/.. - -$(SKEL_TMP_OUT): - $(Q)$(MKDIR) -p $@ +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(BPFTOOL): | $(SKEL_TMP_OUT) - CFLAGS= $(MAKE) -C ../bpf/bpftool \ + $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 2234fbd0a912..d3a18f9c85f6 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -3,7 +3,7 @@ #include "../../../util/cpumap.h" #include "../../../util/pmu.h" -struct pmu_events_map *pmu_events_map__find(void) +const struct pmu_events_map *pmu_events_map__find(void) { struct perf_pmu *pmu = NULL; diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 04e5dc07e93f..93339d17acc4 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -77,6 +77,8 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_PMC4] = "pmc4", [PERF_REG_POWERPC_PMC5] = "pmc5", [PERF_REG_POWERPC_PMC6] = "pmc6", + [PERF_REG_POWERPC_SDAR] = "sdar", + [PERF_REG_POWERPC_SIAR] = "siar", }; static inline const char *__perf_reg_name(int id) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 58b2d610aadb..e8fe36b10d20 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,7 +40,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(struct pmu_event *pe) +int arch_get_runtimeparam(const struct pmu_event *pe) { int count; char path[PATH_MAX] = "/devices/hv_24x7/interface/"; diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 16510686c138..1a9b40ea92a5 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -113,10 +113,11 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist) struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, str, &err); if (err.str) - parse_events_print_error(&err, "tracepoint"); + parse_events_error__print(&err, "tracepoint"); + parse_events_error__exit(&err); return ret; } diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 8116a253f91f..8d07a78e742a 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -74,6 +74,8 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4), SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5), SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6), + SMPL_REG(sdar, PERF_REG_POWERPC_SDAR), + SMPL_REG(siar, PERF_REG_POWERPC_SIAR), SMPL_REG_END }; diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3018a054526a..20cd6244863b 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -45,7 +45,7 @@ static const Dwfl_Callbacks offline_callbacks = { */ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) { - Dwarf_Op ops_mem[2]; + Dwarf_Op ops_mem[3]; Dwarf_Op dummy; Dwarf_Op *ops = &dummy; size_t nops; diff --git a/tools/perf/arch/riscv64/annotate/instructions.c b/tools/perf/arch/riscv64/annotate/instructions.c new file mode 100644 index 000000000000..869a0eb28953 --- /dev/null +++ b/tools/perf/arch/riscv64/annotate/instructions.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +static +struct ins_ops *riscv64__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "jal", 3) || + !strncmp(name, "jr", 2) || + !strncmp(name, "call", 4)) + ops = &call_ops; + else if (!strncmp(name, "ret", 3)) + ops = &ret_ops; + else if (name[0] == 'j' || name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int riscv64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = riscv64__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 24ea12ec7e02..305872692bfd 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -144,9 +144,32 @@ static struct ins x86__instructions[] = { { .name = "xorps", .ops = &mov_ops, }, }; -static bool x86__ins_is_fused(struct arch *arch, const char *ins1, +static bool amd__ins_is_fused(struct arch *arch, const char *ins1, const char *ins2) { + if (strstr(ins2, "jmp")) + return false; + + /* Family >= 15h supports cmp/test + branch fusion */ + if (arch->family >= 0x15 && (strstarts(ins1, "test") || + (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) { + return true; + } + + /* Family >= 19h supports some ALU + branch fusion */ + if (arch->family >= 0x19 && (strstarts(ins1, "add") || + strstarts(ins1, "sub") || strstarts(ins1, "and") || + strstarts(ins1, "inc") || strstarts(ins1, "dec") || + strstarts(ins1, "or") || strstarts(ins1, "xor"))) { + return true; + } + + return false; +} + +static bool intel__ins_is_fused(struct arch *arch, const char *ins1, + const char *ins2) +{ if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp")) return false; @@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) if (ret == 3) { arch->family = family; arch->model = model; + arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ? + amd__ins_is_fused : + intel__ins_is_fused; return 0; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 2f733cdc8dbb..ac2899a25b7a 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -1,8 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> +#include <stdlib.h> #include "util/evsel.h" +#include "util/env.h" +#include "linux/string.h" void arch_evsel__set_sample_weight(struct evsel *evsel) { evsel__set_sample_bit(evsel, WEIGHT_STRUCT); } + +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) +{ + struct perf_env env = { .total_mem = 0, } ; + + if (!perf_env__cpuid(&env)) + return; + + /* + * On AMD, precise cycles event sampling internally uses IBS pmu. + * But IBS does not have filtering capabilities and perf by default + * sets exclude_guest = 1. This makes IBS pmu event init fail and + * thus perf ends up doing non-precise sampling. Avoid it by clearing + * exclude_guest. + */ + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + attr->exclude_guest = 0; + + free(env.cpuid); +} diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 83e9897c64a1..482738e9bdad 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -25,6 +25,11 @@ static int iterations = 100; static int nr_events = 1; static const char *event_string = "dummy"; +static inline u64 timeval2usec(struct timeval *tv) +{ + return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; +} + static struct record_opts opts = { .sample_time = true, .mmap_pages = UINT_MAX, @@ -73,7 +78,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) static struct evlist *bench__create_evlist(char *evstr) { - struct parse_events_error err = { .idx = 0, }; + struct parse_events_error err; struct evlist *evlist = evlist__new(); int ret; @@ -82,14 +87,16 @@ static struct evlist *bench__create_evlist(char *evstr) return NULL; } + parse_events_error__init(&err); ret = parse_events(evlist, evstr, &err); if (ret) { - parse_events_print_error(&err, evstr); + parse_events_error__print(&err, evstr); + parse_events_error__exit(&err); pr_err("Run 'perf list' for a list of valid events\n"); ret = 1; goto out_delete_evlist; } - + parse_events_error__exit(&err); ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); @@ -167,7 +174,7 @@ static int bench_evlist_open_close__run(char *evstr) gettimeofday(&end, NULL); timersub(&end, &start, &diff); - runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + runtime_us = timeval2usec(&diff); update_stats(&time_stats, runtime_us); evlist__delete(evlist); diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b3853aac3021..ebdc2b032afc 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -28,7 +28,7 @@ struct bench_futex_parameters { }; /** - * futex() - SYS_futex syscall wrapper + * futex_syscall() - SYS_futex syscall wrapper * @uaddr: address of first futex * @op: futex op code * @val: typically expected value of uaddr, but varies by op @@ -38,17 +38,26 @@ struct bench_futex_parameters { * @val3: varies by op * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG * - * futex() is used by all the following futex op wrappers. It can also be + * futex_syscall() is used by all the following futex op wrappers. It can also be * used for misuse and abuse testing. Generally, the specific op wrappers - * should be used instead. It is a macro instead of an static inline function as - * some of the types over overloaded (timeout is used for nr_requeue for - * example). + * should be used instead. * * These argument descriptions are the defaults for all * like-named arguments in the following wrappers except where noted below. */ -#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ - syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) +static inline int +futex_syscall(volatile u_int32_t *uaddr, int op, u_int32_t val, struct timespec *timeout, + volatile u_int32_t *uaddr2, int val3, int opflags) +{ + return syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); +} + +static inline int +futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int nr_requeue, + volatile u_int32_t *uaddr2, int val3, int opflags) +{ + return syscall(SYS_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); +} /** * futex_wait() - block on uaddr with optional timeout @@ -57,7 +66,7 @@ struct bench_futex_parameters { static inline int futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } /** @@ -67,7 +76,7 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag static inline int futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) { - return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } /** @@ -76,7 +85,7 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) static inline int futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); } /** @@ -85,7 +94,7 @@ futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) static inline int futex_unlock_pi(u_int32_t *uaddr, int opflags) { - return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); } /** @@ -97,8 +106,8 @@ static inline int futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, int nr_requeue, int opflags) { - return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, - val, opflags); + return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); } /** @@ -113,8 +122,8 @@ static inline int futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, - opflags); + return futex_syscall(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); } /** @@ -130,8 +139,8 @@ static inline int futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_requeue, int opflags) { - return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, - val, opflags); + return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, + val, opflags); } #endif /* _FUTEX_H */ diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 05f7c923c745..7401ebbac100 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -80,7 +80,7 @@ static int do_run_single_threaded(struct perf_session *session, NULL, target, threads, process_synthesized_event, - data_mmap, + true, data_mmap, nr_threads_synthesize); if (err) return err; @@ -171,7 +171,7 @@ static int do_run_multi_threaded(struct target *target, NULL, target, NULL, process_synthesized_event, - false, + true, false, nr_threads_synthesize); if (err) { perf_session__delete(session); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 05eb098cb0e3..490bb9b8cf17 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -591,6 +591,10 @@ int cmd_annotate(int argc, const char **argv) return ret; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; + if (quiet) perf_quiet_option(); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a192014fa52b..b5c67ef73862 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2768,6 +2768,10 @@ static int perf_c2c__report(int argc, const char **argv) if (c2c.stats_only) c2c.use_stdio = true; + err = symbol__validate_sym_arguments(); + if (err) + goto out; + if (!input_name || !strlen(input_name)) input_name = "perf.data"; diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 61929f63a047..6cb3f6cc36d0 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1121,8 +1121,6 @@ static int setup_config(struct daemon *daemon) #ifndef F_TLOCK #define F_TLOCK 2 -#include <sys/file.h> - static int lockf(int fd, int cmd, off_t len) { if (cmd != F_TLOCK || len != 0) @@ -1403,8 +1401,10 @@ out: static int send_cmd_list(struct daemon *daemon) { - union cmd cmd = { .cmd = CMD_LIST, }; + union cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.list.cmd = CMD_LIST; cmd.list.verbose = verbose; cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0; @@ -1432,6 +1432,7 @@ static int __cmd_signal(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); cmd.signal.cmd = CMD_SIGNAL, cmd.signal.sig = SIGUSR2; strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1); @@ -1446,7 +1447,7 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd = { .cmd = CMD_STOP, }; + union cmd cmd; argc = parse_options(argc, argv, start_options, daemon_usage, 0); if (argc) @@ -1457,6 +1458,8 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = CMD_STOP; return send_cmd(daemon, &cmd); } @@ -1470,7 +1473,7 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd = { .cmd = CMD_PING, }; + union cmd cmd; argc = parse_options(argc, argv, ping_options, daemon_usage, 0); if (argc) @@ -1481,6 +1484,8 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = CMD_PING; scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name); return send_cmd(daemon, &cmd); } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6ad191e731fc..bc5259db5fd9 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -815,7 +815,8 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; inject->tool.aux = perf_event__drop_aux; - inject->tool.itrace_start = perf_event__drop_aux, + inject->tool.itrace_start = perf_event__drop_aux; + inject->tool.aux_output_hw_id = perf_event__drop_aux; inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ @@ -882,6 +883,7 @@ int cmd_inject(int argc, const char **argv) .lost_samples = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, + .aux_output_hw_id = perf_event__repipe, .context_switch = perf_event__repipe, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, @@ -938,6 +940,10 @@ int cmd_inject(int argc, const char **argv) #endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, + "don't load vmlinux even if found"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), @@ -972,6 +978,9 @@ int cmd_inject(int argc, const char **argv) return -1; } + if (symbol__validate_sym_arguments()) + return -1; + if (inject.in_place_update) { if (!strcmp(inject.input_name, "-")) { pr_err("Input file name required for in-place updating\n"); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index aa1b127ffb5b..c6f352ee57e6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, perf_session__set_id_hdr_size(kvm->session); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, - kvm->evlist->core.threads, false, 1); + kvm->evlist->core.threads, true, false, 1); err = kvm_live_open_events(kvm); if (err) goto out; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 10ab5e40a34f..468958154ed9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -12,6 +12,7 @@ #include "util/parse-events.h" #include "util/pmu.h" +#include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" #include <subcmd/pager.h> @@ -20,13 +21,15 @@ static bool desc_flag = true; static bool details_flag; +static const char *hybrid_type; int cmd_list(int argc, const char **argv) { - int i; + int i, ret = 0; bool raw_dump = false; bool long_desc_flag = false; bool deprecated = false; + char *pmu_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -37,6 +40,9 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), + OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", + "Print events applying cpu with this type for hybrid platform " + "(e.g. core or atom)"), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -56,10 +62,16 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e):\n\n"); + if (hybrid_type) { + pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); + if (!pmu_name) + pr_warning("WARNING: hybrid cputype is not supported!\n"); + } + if (argc == 0) { print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag, deprecated); - return 0; + details_flag, deprecated, pmu_name); + goto out; } for (i = 0; i < argc; ++i) { @@ -82,25 +94,27 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated); + deprecated, pmu_name); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) - metricgroup__print(true, false, NULL, raw_dump, details_flag); + metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name); else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) - metricgroup__print(false, true, NULL, raw_dump, details_flag); + metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; sep_idx = sep - argv[i]; s = strdup(argv[i]); - if (s == NULL) - return -1; + if (s == NULL) { + ret = -1; + goto out; + } s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -116,12 +130,16 @@ int cmd_list(int argc, const char **argv) print_pmu_events(s, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated); + deprecated, + pmu_name); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); free(s); } } - return 0; + +out: + free(pmu_name); + return ret; } diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e1dd51f2874b..c31627af75d4 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -21,6 +21,7 @@ #include "util/build-id.h" #include "util/strlist.h" #include "util/strfilter.h" +#include "util/symbol.h" #include "util/symbol_conf.h" #include "util/debug.h" #include <subcmd/parse-options.h> @@ -629,6 +630,10 @@ __cmd_probe(int argc, const char **argv) params.command = 'a'; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; + if (params.quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b3509d9d20cc..0338b813585a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1255,6 +1255,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) { int err; struct perf_thread_map *thread_map; + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; if (rec->opts.tail_synthesize != tail) return 0; @@ -1266,6 +1267,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) err = perf_event__synthesize_thread_map(&rec->tool, thread_map, process_synthesized_event, &rec->session->machines.host, + needs_mmap, rec->opts.sample_address); perf_thread_map__put(thread_map); return err; @@ -1409,7 +1411,7 @@ static int record__synthesize(struct record *rec, bool tail) goto out; /* Synthesize id_index before auxtrace_info */ - if (rec->opts.auxtrace_sample_mode) { + if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { err = perf_event__synthesize_id_index(tool, process_synthesized_event, session->evlist, machine); @@ -1470,19 +1472,26 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); - err = perf_event__synthesize_cgroups(tool, process_synthesized_event, - machine); - if (err < 0) - pr_warning("Couldn't synthesize cgroup events.\n"); + if (rec->opts.synth & PERF_SYNTH_CGROUP) { + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, + machine); + if (err < 0) + pr_warning("Couldn't synthesize cgroup events.\n"); + } if (rec->opts.nr_threads_synthesize > 1) { perf_set_multithreaded(); f = process_locked_synthesized_event; } - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - f, opts->sample_address, - rec->opts.nr_threads_synthesize); + if (rec->opts.synth & PERF_SYNTH_TASK) { + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; + + err = __machine__synthesize_threads(machine, tool, &opts->target, + rec->evlist->core.threads, + f, needs_mmap, opts->sample_address, + rec->opts.nr_threads_synthesize); + } if (rec->opts.nr_threads_synthesize > 1) perf_set_singlethreaded(); @@ -2391,6 +2400,26 @@ static int process_timestamp_boundary(struct perf_tool *tool, return 0; } +static int parse_record_synth_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct record_opts *opts = opt->value; + char *p = strdup(str); + + if (p == NULL) + return -1; + + opts->synth = parse_synth_opt(p); + free(p); + + if (opts->synth < 0) { + pr_err("Invalid synth option: %s\n", str); + return -1; + } + return 0; +} + /* * XXX Ideally would be local to cmd_record() and passed to a record__new * because we need to have access to it in record__exit, that is called @@ -2416,6 +2445,7 @@ static struct record record = { .nr_threads_synthesize = 1, .ctl_fd = -1, .ctl_fd_ack = -1, + .synth = PERF_SYNTH_ALL, }, .tool = { .sample = process_sample_event, @@ -2631,6 +2661,8 @@ static struct option __record_options[] = { "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), + OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", + "Fine-tune event synthesis: default=all", parse_record_synth_option), OPT_END() }; @@ -2680,6 +2712,10 @@ int cmd_record(int argc, const char **argv) if (quiet) perf_quiet_option(); + err = symbol__validate_sym_arguments(); + if (err) + return err; + /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a0316ce910db..8167ebfe776a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1378,18 +1378,9 @@ int cmd_report(int argc, const char **argv) if (quiet) perf_quiet_option(); - if (symbol_conf.vmlinux_name && - access(symbol_conf.vmlinux_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - ret = -EINVAL; - goto exit; - } - if (symbol_conf.kallsyms_name && - access(symbol_conf.kallsyms_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - ret = -EINVAL; + ret = symbol__validate_sym_arguments(); + if (ret) goto exit; - } if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 635a6b5a9ec9..4527f632ebe4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3538,6 +3538,7 @@ int cmd_sched(int argc, const char **argv) .fork_event = replay_fork_event, }; unsigned int i; + int ret; for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) sched.curr_pid[i] = -1; @@ -3598,6 +3599,9 @@ int cmd_sched(int argc, const char **argv) parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; return perf_sched__timehist(&sched); } else { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6211d0b84b7a..9434367af166 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -122,6 +122,7 @@ enum perf_output_field { PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, + PERF_OUTPUT_INS_LAT = 1ULL << 35, }; struct perf_script { @@ -188,6 +189,7 @@ struct output_option { {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, + {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT}, }; enum { @@ -262,7 +264,8 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE | + PERF_OUTPUT_INS_LAT, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -459,7 +462,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && @@ -522,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(INS_LAT) && + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT)) + return -EINVAL; + return 0; } @@ -2039,6 +2046,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(WEIGHT)) fprintf(fp, "%16" PRIu64, sample->weight); + if (PRINT_FIELD(INS_LAT)) + fprintf(fp, "%16" PRIu16, sample->ins_lat); + if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -3715,7 +3725,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size,code_page_size", + "data_page_size,code_page_size,ins_lat", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -3836,6 +3846,9 @@ int cmd_script(int argc, const char **argv) data.path = input_name; data.force = symbol_conf.force; + if (symbol__validate_sym_arguments()) + return -1; + if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) @@ -4039,11 +4052,15 @@ script_found: goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f0ecfda34ece..7974933dbc77 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1750,14 +1750,12 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; - struct parse_events_error errinfo; - /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) return 0; - bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { + struct parse_events_error errinfo; /* Handle -T as -M transaction. Once platform specific metrics * support has been added to the json files, all architectures * will use this approach. To determine transaction support @@ -1772,6 +1770,7 @@ static int add_default_attributes(void) &stat_config.metric_events); } + parse_events_error__init(&errinfo); if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, @@ -1782,13 +1781,14 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); - parse_events_print_error(&errinfo, transaction_attrs); - return -1; + parse_events_error__print(&errinfo, transaction_attrs); } - return 0; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (smi_cost) { + struct parse_events_error errinfo; int smi; if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { @@ -1804,23 +1804,23 @@ static int add_default_attributes(void) smi_reset = true; } - if (pmu_have_event("msr", "aperf") && - pmu_have_event("msr", "smi")) { - if (!force_metric_only) - stat_config.metric_only = true; - err = parse_events(evsel_list, smi_cost_attrs, &errinfo); - } else { + if (!pmu_have_event("msr", "aperf") || + !pmu_have_event("msr", "smi")) { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); - parse_events_print_error(&errinfo, smi_cost_attrs); return -1; } + if (!force_metric_only) + stat_config.metric_only = true; + + parse_events_error__init(&errinfo); + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); if (err) { - parse_events_print_error(&errinfo, smi_cost_attrs); + parse_events_error__print(&errinfo, smi_cost_attrs); fprintf(stderr, "Cannot set up SMI cost events\n"); - return -1; } - return 0; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (topdown_run) { @@ -1875,18 +1875,22 @@ static int add_default_attributes(void) return -1; } if (topdown_attrs[0] && str) { + struct parse_events_error errinfo; if (warn) arch_topdown_group_warn(); setup_metrics: + parse_events_error__init(&errinfo); err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); - parse_events_print_error(&errinfo, str); + parse_events_error__print(&errinfo, str); + parse_events_error__exit(&errinfo); free(str); return -1; } + parse_events_error__exit(&errinfo); } else { fprintf(stderr, "System does not support topdown\n"); return -1; @@ -1896,6 +1900,7 @@ setup_metrics: if (!evsel_list->core.nr_entries) { if (perf_pmu__has_hybrid()) { + struct parse_events_error errinfo; const char *hybrid_str = "cycles,instructions,branches,branch-misses"; if (target__has_cpu(&target)) @@ -1906,15 +1911,16 @@ setup_metrics: return -1; } + parse_events_error__init(&errinfo); err = parse_events(evsel_list, hybrid_str, &errinfo); if (err) { fprintf(stderr, "Cannot set up hybrid events %s: %d\n", hybrid_str, err); - parse_events_print_error(&errinfo, hybrid_str); - return -1; + parse_events_error__print(&errinfo, hybrid_str); } - return err; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (target__has_cpu(&target)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a3ae9176a83e..1fc390f136dd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top) pr_debug("Couldn't synthesize cgroup events.\n"); machine__synthesize_threads(&top->session->machines.host, &opts->target, - top->evlist->core.threads, false, + top->evlist->core.threads, true, false, top->nr_threads_synthesize); if (top->nr_threads_synthesize > 1) @@ -1618,6 +1618,10 @@ int cmd_top(int argc, const char **argv) if (argc) usage_with_options(top_usage, options); + status = symbol__validate_sym_arguments(); + if (status) + goto out_delete_evlist; + if (annotate_check_args(&top.annotation_opts) < 0) goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2bf21194c7b3..624ea12ce5ca 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1628,8 +1628,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist) goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, - evlist->core.threads, trace__tool_process, false, - 1); + evlist->core.threads, trace__tool_process, + true, false, 1); out: if (err) symbol__exit(); @@ -3063,15 +3063,11 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (ret) { - free(err.str); - free(err.help); - free(err.first_str); - free(err.first_help); + parse_events_error__exit(&err); + if (ret) return false; - } evlist__for_each_entry_safe(evlist, evsel, tmp) { if (!strstarts(evsel__name(evsel), "probe:vfs_getname")) @@ -4925,12 +4921,13 @@ int cmd_trace(int argc, const char **argv) if (trace.perfconfig_events != NULL) { struct parse_events_error parse_err; - bzero(&parse_err, sizeof(parse_err)); + parse_events_error__init(&parse_err); err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); - if (err) { - parse_events_print_error(&parse_err, trace.perfconfig_events); + if (err) + parse_events_error__print(&parse_err, trace.perfconfig_events); + parse_events_error__exit(&parse_err); + if (err) goto out; - } } if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f1e46277e822..30ecf3a0f68b 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -26,6 +26,7 @@ include/vdso/bits.h include/linux/const.h include/vdso/const.h include/linux/hash.h +include/linux/list-sort.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h arch/x86/include/asm/required-features.h @@ -150,6 +151,7 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' +check lib/list_sort.c '-I "^#include <linux/bug.h>"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/dlfilters/dlfilter-show-cycles.c b/tools/perf/dlfilters/dlfilter-show-cycles.c new file mode 100644 index 000000000000..9eccc97bff82 --- /dev/null +++ b/tools/perf/dlfilters/dlfilter-show-cycles.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dlfilter-show-cycles.c: Print the number of cycles at the start of each line + * Copyright (c) 2021, Intel Corporation. + */ +#include <perf/perf_dlfilter.h> +#include <string.h> +#include <stdio.h> + +#define MAX_CPU 4096 + +enum { + INSTR_CYC, + BRNCH_CYC, + OTHER_CYC, + MAX_ENTRY +}; + +static __u64 cycles[MAX_CPU][MAX_ENTRY]; +static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY]; + +#define BITS 16 +#define TABLESZ (1 << BITS) +#define TABLEMAX (TABLESZ / 2) +#define MASK (TABLESZ - 1) + +static struct entry { + __u32 used; + __s32 tid; + __u64 cycles[MAX_ENTRY]; + __u64 cycles_rpt[MAX_ENTRY]; +} table[TABLESZ]; + +static int tid_cnt; + +static int event_entry(const char *event) +{ + if (!event) + return OTHER_CYC; + if (!strncmp(event, "instructions", 12)) + return INSTR_CYC; + if (!strncmp(event, "branches", 8)) + return BRNCH_CYC; + return OTHER_CYC; +} + +static struct entry *find_entry(__s32 tid) +{ + __u32 pos = tid & MASK; + struct entry *e; + + e = &table[pos]; + while (e->used) { + if (e->tid == tid) + return e; + if (++pos == TABLESZ) + pos = 0; + e = &table[pos]; + } + + if (tid_cnt >= TABLEMAX) { + fprintf(stderr, "Too many threads\n"); + return NULL; + } + + tid_cnt += 1; + e->used = 1; + e->tid = tid; + return e; +} + +static void add_entry(__s32 tid, int pos, __u64 cnt) +{ + struct entry *e = find_entry(tid); + + if (e) + e->cycles[pos] += cnt; +} + +int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + __s32 cpu = sample->cpu; + __s32 tid = sample->tid; + int pos; + + if (!sample->cyc_cnt) + return 0; + + pos = event_entry(sample->event); + + if (cpu >= 0 && cpu < MAX_CPU) + cycles[cpu][pos] += sample->cyc_cnt; + else if (tid != -1) + add_entry(tid, pos, sample->cyc_cnt); + return 0; +} + +static void print_vals(__u64 cycles, __u64 delta) +{ + if (delta) + printf("%10llu %10llu ", cycles, delta); + else + printf("%10llu %10s ", cycles, ""); +} + +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + __s32 cpu = sample->cpu; + __s32 tid = sample->tid; + int pos; + + pos = event_entry(sample->event); + + if (cpu >= 0 && cpu < MAX_CPU) { + print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]); + cycles_rpt[cpu][pos] = cycles[cpu][pos]; + return 0; + } + + if (tid != -1) { + struct entry *e = find_entry(tid); + + if (e) { + print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]); + e->cycles_rpt[pos] = e->cycles[pos]; + return 0; + } + } + + printf("%22s", ""); + return 0; +} + +const char *filter_description(const char **long_description) +{ + static char *long_desc = "Cycle counts are accumulated per CPU (or " + "per thread if CPU is not recorded) from IPC information, and " + "printed together with the change since the last print, at the " + "start of each line. Separate counts are kept for branches, " + "instructions or other events."; + + *long_description = long_desc; + return "Print the number of cycles at the start of each line"; +} diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json index 9bea1ba1c4d2..cf48d0dfc759 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json @@ -18,6 +18,6 @@ "ArchStdEvent": "BUS_ACCESS_PERIPH" }, { - "ArchStdEvent": "BUS_ACCESS", + "ArchStdEvent": "BUS_ACCESS" } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json index 1e25f2ae4ae0..4cc50b7da526 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -39,31 +39,31 @@ "ArchStdEvent": "L2D_CACHE_INVAL" }, { - "ArchStdEvent": "L1I_CACHE_REFILL", + "ArchStdEvent": "L1I_CACHE_REFILL" }, { - "ArchStdEvent": "L1I_TLB_REFILL", + "ArchStdEvent": "L1I_TLB_REFILL" }, { - "ArchStdEvent": "L1D_CACHE_REFILL", + "ArchStdEvent": "L1D_CACHE_REFILL" }, { - "ArchStdEvent": "L1D_CACHE", + "ArchStdEvent": "L1D_CACHE" }, { - "ArchStdEvent": "L1D_TLB_REFILL", + "ArchStdEvent": "L1D_TLB_REFILL" }, { - "ArchStdEvent": "L1I_CACHE", + "ArchStdEvent": "L1I_CACHE" }, { - "ArchStdEvent": "L2D_CACHE", + "ArchStdEvent": "L2D_CACHE" }, { - "ArchStdEvent": "L2D_CACHE_REFILL", + "ArchStdEvent": "L2D_CACHE_REFILL" }, { - "ArchStdEvent": "L2D_CACHE_WB", + "ArchStdEvent": "L2D_CACHE_WB" }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB", @@ -72,7 +72,7 @@ }, { "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB", - "ArchStdEvent": "L1I_TLB", + "ArchStdEvent": "L1I_TLB" }, { "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json index 9076ca2daf9e..927a6f629a03 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "The number of core clock cycles", - "ArchStdEvent": "CPU_CYCLES", + "ArchStdEvent": "CPU_CYCLES" }, { "PublicDescription": "FSU clocking gated off cycle", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json index 9761433ad329..ada052e19632 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json @@ -36,9 +36,9 @@ "ArchStdEvent": "EXC_TRAP_FIQ" }, { - "ArchStdEvent": "EXC_TAKEN", + "ArchStdEvent": "EXC_TAKEN" }, { - "ArchStdEvent": "EXC_RETURN", + "ArchStdEvent": "EXC_RETURN" } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json index 482aa3f19e58..62f6276e3016 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json @@ -44,25 +44,25 @@ "BriefDescription": "Software increment" }, { - "ArchStdEvent": "INST_RETIRED", + "ArchStdEvent": "INST_RETIRED" }, { "ArchStdEvent": "CID_WRITE_RETIRED", "BriefDescription": "Write to CONTEXTIDR" }, { - "ArchStdEvent": "INST_SPEC", + "ArchStdEvent": "INST_SPEC" }, { - "ArchStdEvent": "TTBR_WRITE_RETIRED", + "ArchStdEvent": "TTBR_WRITE_RETIRED" }, { "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", - "ArchStdEvent": "BR_RETIRED", + "ArchStdEvent": "BR_RETIRED" }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", - "ArchStdEvent": "BR_MIS_PRED_RETIRED", + "ArchStdEvent": "BR_MIS_PRED_RETIRED" }, { "PublicDescription": "Operation speculatively executed, NOP", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json index 2e7555696caf..50157e8c2005 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json @@ -15,10 +15,10 @@ "ArchStdEvent": "UNALIGNED_LDST_SPEC" }, { - "ArchStdEvent": "MEM_ACCESS", + "ArchStdEvent": "MEM_ACCESS" }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR", + "ArchStdEvent": "MEMORY_ERROR" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json index ec0dc92288ab..db68de188390 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken", - "ArchStdEvent": "BR_MIS_PRED", + "ArchStdEvent": "BR_MIS_PRED" }, { "PublicDescription": "This event counts all predictable branches.", - "ArchStdEvent": "BR_PRED", + "ArchStdEvent": "BR_PRED" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json index 6263929efce2..e0875d3a685d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -1,21 +1,21 @@ [ { - "PublicDescription": "The number of core clock cycles" + "PublicDescription": "The number of core clock cycles", "ArchStdEvent": "CPU_CYCLES", "BriefDescription": "The number of core clock cycles." }, { "PublicDescription": "This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", - "ArchStdEvent": "BUS_ACCESS", + "ArchStdEvent": "BUS_ACCESS" }, { - "PublicDescription": "This event duplicates CPU_CYCLES." - "ArchStdEvent": "BUS_CYCLES", + "PublicDescription": "This event duplicates CPU_CYCLES.", + "ArchStdEvent": "BUS_CYCLES" }, { - "ArchStdEvent": "BUS_ACCESS_RD", + "ArchStdEvent": "BUS_ACCESS_RD" }, { - "ArchStdEvent": "BUS_ACCESS_WR", + "ArchStdEvent": "BUS_ACCESS_WR" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json index cd67bb9df139..fc448c2d5ea4 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -1,47 +1,47 @@ [ { "PublicDescription": "This event counts any instruction fetch which misses in the cache.", - "ArchStdEvent": "L1I_CACHE_REFILL", + "ArchStdEvent": "L1I_CACHE_REFILL" }, { "PublicDescription": "This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1I_TLB_REFILL", + "ArchStdEvent": "L1I_TLB_REFILL" }, { "PublicDescription": "This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", - "ArchStdEvent": "L1D_CACHE_REFILL", + "ArchStdEvent": "L1D_CACHE_REFILL" }, { "PublicDescription": "This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", - "ArchStdEvent": "L1D_CACHE", + "ArchStdEvent": "L1D_CACHE" }, { "PublicDescription": "This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1D_TLB_REFILL", + "ArchStdEvent": "L1D_TLB_REFILL" }, - {, + { "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", - "ArchStdEvent": "L1I_CACHE", + "ArchStdEvent": "L1I_CACHE" }, { "PublicDescription": "This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", - "ArchStdEvent": "L1D_CACHE_WB", + "ArchStdEvent": "L1D_CACHE_WB" }, { "PublicDescription": "This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", - "ArchStdEvent": "L2D_CACHE", + "ArchStdEvent": "L2D_CACHE" }, { "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", - "ArchStdEvent": "L2D_CACHE_REFILL", + "ArchStdEvent": "L2D_CACHE_REFILL" }, { "PublicDescription": "This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", - "ArchStdEvent": "L2D_CACHE_WB", + "ArchStdEvent": "L2D_CACHE_WB" }, { "PublicDescription": "This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", - "ArchStdEvent": "L2D_CACHE_ALLOCATE", + "ArchStdEvent": "L2D_CACHE_ALLOCATE" }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", @@ -75,21 +75,21 @@ }, { "PublicDescription": "This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", - "ArchStdEvent": "L2D_TLB", + "ArchStdEvent": "L2D_TLB" }, { "PublicDescription": "This event counts on any data access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "DTLB_WALK", + "ArchStdEvent": "DTLB_WALK" }, { "PublicDescription": "This event counts on any instruction access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "ITLB_WALK", + "ArchStdEvent": "ITLB_WALK" }, { - "ArchStdEvent": "LL_CACHE_RD", + "ArchStdEvent": "LL_CACHE_RD" }, { - "ArchStdEvent": "LL_CACHE_MISS_RD", + "ArchStdEvent": "LL_CACHE_MISS_RD" }, { "ArchStdEvent": "L1D_CACHE_INVAL" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json index ea4631db41b5..ce942324ee60 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -1,10 +1,10 @@ [ { - "ArchStdEvent": "EXC_TAKEN", + "ArchStdEvent": "EXC_TAKEN" }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR", + "ArchStdEvent": "MEMORY_ERROR" }, { "ArchStdEvent": "EXC_DABORT" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json index 8e59566cba8b..b0b439a36ae9 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -1,32 +1,32 @@ [ { - "ArchStdEvent": "SW_INCR", + "ArchStdEvent": "SW_INCR" }, { "PublicDescription": "This event counts all retired instructions, including those that fail their condition check.", - "ArchStdEvent": "INST_RETIRED", + "ArchStdEvent": "INST_RETIRED" }, { - "ArchStdEvent": "EXC_RETURN", + "ArchStdEvent": "EXC_RETURN" }, { "PublicDescription": "This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", - "ArchStdEvent": "CID_WRITE_RETIRED", + "ArchStdEvent": "CID_WRITE_RETIRED" }, { - "ArchStdEvent": "INST_SPEC", + "ArchStdEvent": "INST_SPEC" }, { "PublicDescription": "This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", - "ArchStdEvent": "TTBR_WRITE_RETIRED", + "ArchStdEvent": "TTBR_WRITE_RETIRED" }, - {, + { "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", - "ArchStdEvent": "BR_RETIRED", + "ArchStdEvent": "BR_RETIRED" }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", - "ArchStdEvent": "BR_MIS_PRED_RETIRED", + "ArchStdEvent": "BR_MIS_PRED_RETIRED" }, { "ArchStdEvent": "ASE_SPEC" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json index f06f399051c1..20a929e7728d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", - "ArchStdEvent": "MEM_ACCESS", + "ArchStdEvent": "MEM_ACCESS" }, { "ArchStdEvent": "MEM_ACCESS_RD" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json index c2ccbf6fbfa0..20d8365756c5 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -1,5 +1,5 @@ [ { - "ArchStdEvent": "REMOTE_ACCESS", + "ArchStdEvent": "REMOTE_ACCESS" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json index d79f0aeaf7f1..b4e96551d51a 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "The counter counts on any cycle when there are no fetched instructions available to dispatch.", - "ArchStdEvent": "STALL_FRONTEND", + "ArchStdEvent": "STALL_FRONTEND" }, { "PublicDescription": "The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", - "ArchStdEvent": "STALL_BACKEND", + "ArchStdEvent": "STALL_BACKEND" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json new file mode 100644 index 000000000000..25825e14c535 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json @@ -0,0 +1,89 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json new file mode 100644 index 000000000000..e3d08f1f7c92 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json new file mode 100644 index 000000000000..20d8365756c5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "REMOTE_ACCESS" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 913fb200ea52..423767510aff 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -258,6 +258,78 @@ "BriefDescription": "Last level cache miss, read" }, { + "PublicDescription": "Level 1 data cache long-latency read miss. The counter counts each memory read access counted by L1D_CACHE that incurs additional latency because it returns data from outside the Level 1 data or unified cache of this processing element.", + "EventCode": "0x39", + "EventName": "L1D_CACHE_LMISS_RD", + "BriefDescription": "Level 1 data cache long-latency read miss" + }, + { + "PublicDescription": "Micro-operation architecturally executed. The counter counts each operation counted by OP_SPEC that would be executed in a simple sequential execution of the program.", + "EventCode": "0x3A", + "EventName": "OP_RETIRED", + "BriefDescription": "Micro-operation architecturally executed" + }, + { + "PublicDescription": "Micro-operation speculatively executed. The counter counts the number of operations executed by the processing element, including those that are executed speculatively and would not be executed in a simple sequential execution of the program.", + "EventCode": "0x3B", + "EventName": "OP_SPEC", + "BriefDescription": "Micro-operation speculatively executed" + }, + { + "PublicDescription": "No operation sent for execution. The counter counts every attributable cycle on which no attributable instruction or operation was sent for execution on this processing element.", + "EventCode": "0x3C", + "EventName": "STALL", + "BriefDescription": "No operation sent for execution" + }, + { + "PublicDescription": "No operation sent for execution on a slot due to the backend. Counts each slot counted by STALL_SLOT where no attributable instruction or operation was sent for execution because the backend is unable to accept it.", + "EventCode": "0x3D", + "EventName": "STALL_SLOT_BACKEND", + "BriefDescription": "No operation sent for execution on a slot due to the backend" + }, + { + "PublicDescription": "No operation sent for execution on a slot due to the frontend. Counts each slot counted by STALL_SLOT where no attributable instruction or operation was sent for execution because there was no attributable instruction or operation available to issue from the processing element from the frontend for the slot.", + "EventCode": "0x3E", + "EventName": "STALL_SLOT_FRONTEND", + "BriefDescription": "No operation sent for execution on a slot due to the frontend" + }, + { + "PublicDescription": "No operation sent for execution on a slot. The counter counts on each attributable cycle the number of instruction or operation slots that were not occupied by an instruction or operation attributable to the processing element.", + "EventCode": "0x3F", + "EventName": "STALL_SLOT", + "BriefDescription": "No operation sent for execution on a slot" + }, + { + "PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.", + "EventCode": "0x4004", + "EventName": "CNT_CYCLES", + "BriefDescription": "Constant frequency cycles" + }, + { + "PublicDescription": "Memory stall cycles. The counter counts each cycle counted by STALL_BACKEND where there is a cache miss in the last level of cache within the processing element clock domain", + "EventCode": "0x4005", + "EventName": "STALL_BACKEND_MEM", + "BriefDescription": "Memory stall cycles" + }, + { + "PublicDescription": "Level 1 instruction cache long-latency read miss. If the L1I_CACHE_RD event is implemented, the counter counts each access counted by L1I_CACHE_RD that incurs additional latency because it returns instructions from outside of the Level 1 instruction cache of this PE. If the L1I_CACHE_RD event is not implemented, the counter counts each access counted by L1I_CACHE that incurs additional latency because it returns instructions from outside the Level 1 instruction cache of this PE. The event indicates to software that the access missed in the Level 1 instruction cache and might have a significant performance impact due to the additional latency, compared to the latency of an access that hits in the Level 1 instruction cache.", + "EventCode": "0x4006", + "EventName": "L1I_CACHE_LMISS", + "BriefDescription": "Level 1 instruction cache long-latency read miss" + }, + { + "PublicDescription": "Level 2 data cache long-latency read miss. The counter counts each memory read access counted by L2D_CACHE that incurs additional latency because it returns data from outside the Level 2 data or unified cache of this processing element. The event indicates to software that the access missed in the Level 2 data or unified cache and might have a significant performance impact compared to the latency of an access that hits in the Level 2 data or unified cache.", + "EventCode": "0x4009", + "EventName": "L2D_CACHE_LMISS_RD", + "BriefDescription": "Level 2 data cache long-latency read miss" + }, + { + "PublicDescription": "Level 3 data cache long-latency read miss. The counter counts each memory read access counted by L3D_CACHE that incurs additional latency because it returns data from outside the Level 3 data or unified cache of this processing element. The event indicates to software that the access missed in the Level 3 data or unified cache and might have a significant performance impact compared to the latency of an access that hits in the Level 3 data or unified cache.", + "EventCode": "0x400B", + "EventName": "L3D_CACHE_LMISS_RD", + "BriefDescription": "Level 3 data cache long-latency read miss" + }, + { "PublicDescription": "SIMD Instruction architecturally executed.", "EventCode": "0x8000", "EventName": "SIMD_INST_RETIRED", diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index dda8e59149d2..6970203cb247 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -229,5 +229,5 @@ "BriefDescription": "Store bound L3 topdown metric", "MetricGroup": "TopDownL3", "MetricName": "store_bound" - }, + } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json index 61514d38601b..2b3cb55df288 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -1,56 +1,56 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_ddrc.flux_wr", + "ConfigCode": "0x00", + "EventName": "flux_wr", "BriefDescription": "DDRC total write operations", "PublicDescription": "DDRC total write operations", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_ddrc.flux_rd", + "ConfigCode": "0x01", + "EventName": "flux_rd", "BriefDescription": "DDRC total read operations", "PublicDescription": "DDRC total read operations", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_ddrc.flux_wcmd", + "ConfigCode": "0x02", + "EventName": "flux_wcmd", "BriefDescription": "DDRC write commands", "PublicDescription": "DDRC write commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_ddrc.flux_rcmd", + "ConfigCode": "0x03", + "EventName": "flux_rcmd", "BriefDescription": "DDRC read commands", "PublicDescription": "DDRC read commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x04", - "EventName": "uncore_hisi_ddrc.pre_cmd", + "ConfigCode": "0x04", + "EventName": "pre_cmd", "BriefDescription": "DDRC precharge commands", "PublicDescription": "DDRC precharge commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x05", - "EventName": "uncore_hisi_ddrc.act_cmd", + "ConfigCode": "0x05", + "EventName": "act_cmd", "BriefDescription": "DDRC active commands", "PublicDescription": "DDRC active commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x06", - "EventName": "uncore_hisi_ddrc.rnk_chg", + "ConfigCode": "0x06", + "EventName": "rnk_chg", "BriefDescription": "DDRC rank commands", "PublicDescription": "DDRC rank commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x07", - "EventName": "uncore_hisi_ddrc.rw_chg", + "ConfigCode": "0x07", + "EventName": "rw_chg", "BriefDescription": "DDRC read and write changes", "PublicDescription": "DDRC read and write changes", "Unit": "hisi_sccl,ddrc" diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json index ada86782933f..9a7ec7af2060 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -1,72 +1,152 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_hha.rx_ops_num", + "ConfigCode": "0x00", + "EventName": "rx_ops_num", "BriefDescription": "The number of all operations received by the HHA", "PublicDescription": "The number of all operations received by the HHA", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_hha.rx_outer", + "ConfigCode": "0x01", + "EventName": "rx_outer", "BriefDescription": "The number of all operations received by the HHA from another socket", "PublicDescription": "The number of all operations received by the HHA from another socket", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_hha.rx_sccl", + "ConfigCode": "0x02", + "EventName": "rx_sccl", "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_hha.rx_ccix", + "ConfigCode": "0x03", + "EventName": "rx_ccix", "BriefDescription": "Count of the number of operations that HHA has received from CCIX", "PublicDescription": "Count of the number of operations that HHA has received from CCIX", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1c", - "EventName": "uncore_hisi_hha.rd_ddr_64b", + "ConfigCode": "0x4", + "EventName": "rx_wbi", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x5", + "EventName": "rx_wbip", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x11", + "EventName": "rx_wtistash", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x1c", + "EventName": "rd_ddr_64b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1d", - "EventName": "uncore_hisi_hha.wr_ddr_64b", + "ConfigCode": "0x1d", + "EventName": "wr_ddr_64b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1e", - "EventName": "uncore_hisi_hha.rd_ddr_128b", + "ConfigCode": "0x1e", + "EventName": "rd_ddr_128b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1f", - "EventName": "uncore_hisi_hha.wr_ddr_128b", + "ConfigCode": "0x1f", + "EventName": "wr_ddr_128b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x20", - "EventName": "uncore_hisi_hha.spill_num", + "ConfigCode": "0x20", + "EventName": "spill_num", "BriefDescription": "Count of the number of spill operations that the HHA has sent", "PublicDescription": "Count of the number of spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x21", - "EventName": "uncore_hisi_hha.spill_success", + "ConfigCode": "0x21", + "EventName": "spill_success", "BriefDescription": "Count of the number of successful spill operations that the HHA has sent", "PublicDescription": "Count of the number of successful spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x23", + "EventName": "bi_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x32", + "EventName": "mediated_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x33", + "EventName": "tx_snp_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x34", + "EventName": "tx_snp_outer", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x35", + "EventName": "tx_snp_ccix", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x38", + "EventName": "rx_snprspdata", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x3c", + "EventName": "rx_snprsp_outer", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x40", + "EventName": "sdir-lookup", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x41", + "EventName": "edir-lookup", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x42", + "EventName": "sdir-hit", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x43", + "EventName": "edir-hit", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x4c", + "EventName": "sdir-home-migrate", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x4d", + "EventName": "edir-home-migrate", + "Unit": "hisi_sccl,hha" } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json index 67ab19e8cf3a..e3479b65be9a 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -1,91 +1,91 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_l3c.rd_cpipe", + "ConfigCode": "0x00", + "EventName": "rd_cpipe", "BriefDescription": "Total read accesses", "PublicDescription": "Total read accesses", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_l3c.wr_cpipe", + "ConfigCode": "0x01", + "EventName": "wr_cpipe", "BriefDescription": "Total write accesses", "PublicDescription": "Total write accesses", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_l3c.rd_hit_cpipe", + "ConfigCode": "0x02", + "EventName": "rd_hit_cpipe", "BriefDescription": "Total read hits", "PublicDescription": "Total read hits", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_l3c.wr_hit_cpipe", + "ConfigCode": "0x03", + "EventName": "wr_hit_cpipe", "BriefDescription": "Total write hits", "PublicDescription": "Total write hits", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x04", - "EventName": "uncore_hisi_l3c.victim_num", + "ConfigCode": "0x04", + "EventName": "victim_num", "BriefDescription": "l3c precharge commands", "PublicDescription": "l3c precharge commands", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x20", - "EventName": "uncore_hisi_l3c.rd_spipe", + "ConfigCode": "0x20", + "EventName": "rd_spipe", "BriefDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x21", - "EventName": "uncore_hisi_l3c.wr_spipe", + "ConfigCode": "0x21", + "EventName": "wr_spipe", "BriefDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x22", - "EventName": "uncore_hisi_l3c.rd_hit_spipe", + "ConfigCode": "0x22", + "EventName": "rd_hit_spipe", "BriefDescription": "Count of the number of read lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of read lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x23", - "EventName": "uncore_hisi_l3c.wr_hit_spipe", + "ConfigCode": "0x23", + "EventName": "wr_hit_spipe", "BriefDescription": "Count of the number of write lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of write lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x29", - "EventName": "uncore_hisi_l3c.back_invalid", + "ConfigCode": "0x29", + "EventName": "back_invalid", "BriefDescription": "Count of the number of L3C back invalid operations", "PublicDescription": "Count of the number of L3C back invalid operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x40", - "EventName": "uncore_hisi_l3c.retry_cpu", + "ConfigCode": "0x40", + "EventName": "retry_cpu", "BriefDescription": "Count of the number of retry that L3C suppresses the CPU operations", "PublicDescription": "Count of the number of retry that L3C suppresses the CPU operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x41", - "EventName": "uncore_hisi_l3c.retry_ring", + "ConfigCode": "0x41", + "EventName": "retry_ring", "BriefDescription": "Count of the number of retry that L3C suppresses the ring operations", "PublicDescription": "Count of the number of retry that L3C suppresses the ring operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x42", - "EventName": "uncore_hisi_l3c.prefetch_drop", + "ConfigCode": "0x42", + "EventName": "prefetch_drop", "BriefDescription": "Count of the number of prefetch drops from this L3C", "PublicDescription": "Count of the number of prefetch drops from this L3C", "Unit": "hisi_sccl,l3c" diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index c43591d831b8..31d8b57ca9bb 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -18,6 +18,7 @@ 0x00000000410fd080,v1,arm/cortex-a57-a72,core 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core +0x00000000410fd400,v1,arm/neoverse-v1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core diff --git a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json index 5347350c360c..3e7ac409d894 100644 --- a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json +++ b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json @@ -286,5 +286,5 @@ "EventCode": "0x21e", "EventName": "pop25_inst", "BriefDescription": "V3 POP25 instructions" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json index b6b7f29ca831..86bd8ba9391d 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json @@ -124,5 +124,5 @@ "EventName": "L2C_STORES_SENT", "BriefDescription": "L2C Stores Sent", "PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json index 5da8296b667e..1a5e4f89c57e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json @@ -390,5 +390,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json index 17fb5241928b..fc762e9f1d6e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json index 89e070727e1b..4942b20a1ea1 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json @@ -369,5 +369,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json index 17fb5241928b..fc762e9f1d6e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index c998e4f1d1d2..ad79189050a0 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -26,5 +26,5 @@ "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 24c4ba2a9ae5..8ac61f8f286b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -397,5 +397,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json index b7b42a870bb0..86b29fd181cf 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json @@ -166,5 +166,5 @@ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json index 162251037219..f40cbed89418 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json @@ -243,5 +243,5 @@ "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" - }, + } ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json index 788766f45dbc..73089c682f80 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json @@ -38,5 +38,5 @@ "BriefDescription": "Total cache hits", "PublicDescription": "Total cache hits", "Unit": "imc" - }, + } ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json index 0f681a6e10ea..c7e7528db315 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json @@ -6,4 +6,11 @@ "Unit": "sys_ddr_pmu", "Compat": "v8" }, + { + "BriefDescription": "ccn read-cycles event", + "ConfigCode": "0x2c", + "EventName": "sys_ccn_pmu.read_cycles", + "Unit": "sys_ccn_pmu", + "Compat": "0x01" + } ] diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 57ddbb9f9b31..14b9a8ab15b9 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -311,5 +311,5 @@ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", "MetricName": "C6_Pkg_Residency" - }, + } ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 6731b3cf0c2f..2e7c4153875b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -45,6 +45,7 @@ #include <sys/resource.h> /* getrlimit */ #include <ftw.h> #include <sys/stat.h> +#include <linux/compiler.h> #include <linux/list.h> #include "jsmn.h" #include "json.h" @@ -70,7 +71,7 @@ struct json_event { char *metric_constraint; }; -enum aggr_mode_class convert(const char *aggr_mode) +static enum aggr_mode_class convert(const char *aggr_mode) { if (!strcmp(aggr_mode, "PerCore")) return PerCore; @@ -81,8 +82,6 @@ enum aggr_mode_class convert(const char *aggr_mode) return -1; } -typedef int (*func)(void *data, struct json_event *je); - static LIST_HEAD(sys_event_tables); struct sys_event_table { @@ -361,7 +360,7 @@ static int close_table; static void print_events_table_prefix(FILE *fp, const char *tblname) { - fprintf(fp, "struct pmu_event %s[] = {\n", tblname); + fprintf(fp, "static const struct pmu_event %s[] = {\n", tblname); close_table = 1; } @@ -369,7 +368,7 @@ static int print_events_table_entry(void *data, struct json_event *je) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; - char *topic = pd->topic; + char *topic_local = pd->topic; /* * TODO: Remove formatting chars after debugging to reduce @@ -384,7 +383,7 @@ static int print_events_table_entry(void *data, struct json_event *je) fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); if (je->compat) fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); - fprintf(outfp, "\t.topic = \"%s\",\n", topic); + fprintf(outfp, "\t.topic = \"%s\",\n", topic_local); if (je->long_desc && je->long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); if (je->pmu) @@ -470,7 +469,7 @@ static void free_arch_std_events(void) } } -static int save_arch_std_events(void *data, struct json_event *je) +static int save_arch_std_events(void *data __maybe_unused, struct json_event *je) { struct event_struct *es; @@ -575,10 +574,12 @@ static int json_events(const char *fn, struct json_event je = {}; char *arch_std = NULL; unsigned long long eventcode = 0; + unsigned long long configcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; jsmntok_t *obj = tok++; + bool configcode_present = false; EXPECT(obj->type == JSMN_OBJECT, obj, "expected object"); for (j = 0; j < obj->size; j += 2) { @@ -601,6 +602,12 @@ static int json_events(const char *fn, addfield(map, &code, "", "", val); eventcode |= strtoul(code, NULL, 0); free(code); + } else if (json_streq(map, field, "ConfigCode")) { + char *code = NULL; + addfield(map, &code, "", "", val); + configcode |= strtoul(code, NULL, 0); + free(code); + configcode_present = true; } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); @@ -682,7 +689,10 @@ static int json_events(const char *fn, addfield(map, &extra_desc, " ", "(Precise event)", NULL); } - snprintf(buf, sizeof buf, "event=%#llx", eventcode); + if (configcode_present) + snprintf(buf, sizeof buf, "config=%#llx", configcode); + else + snprintf(buf, sizeof buf, "event=%#llx", eventcode); addfield(map, &event, ",", buf, NULL); if (je.desc && extra_desc) addfield(map, &je.desc, " ", extra_desc, NULL); @@ -786,7 +796,7 @@ static bool is_sys_dir(char *fname) static void print_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); + fprintf(outfp, "const struct pmu_events_map pmu_events_map[] = {\n"); } static void print_mapping_table_suffix(FILE *outfp) @@ -820,7 +830,7 @@ static void print_mapping_test_table(FILE *outfp) static void print_system_event_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {"); + fprintf(outfp, "\nconst struct pmu_sys_events pmu_sys_event_tables[] = {"); } static void print_system_event_mapping_table_suffix(FILE *outfp) @@ -1196,7 +1206,7 @@ int main(int argc, char *argv[]) const char *arch; const char *output_file; const char *start_dirname; - char *err_string_ext = ""; + const char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); @@ -1285,6 +1295,7 @@ int main(int argc, char *argv[]) } free_arch_std_events(); + free_sys_event_tables(); free(mapfile); return 0; @@ -1306,6 +1317,7 @@ err_close_eventsfp: create_empty_mapping(output_file); err_out: free_arch_std_events(); + free_sys_event_tables(); free(mapfile); return ret; } diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c index 11d1fa18bfa5..831dc44c4558 100644 --- a/tools/perf/pmu-events/jsmn.c +++ b/tools/perf/pmu-events/jsmn.c @@ -24,6 +24,7 @@ #include <stdlib.h> #include "jsmn.h" +#define JSMN_STRICT /* * Allocates a fresh unused token from the token pool. @@ -176,6 +177,14 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmnerr_t r; int i; jsmntok_t *token; +#ifdef JSMN_STRICT + /* + * Keeps track of whether a new object/list/primitive is expected. New items are only + * allowed after an opening brace, comma or colon. A closing brace after a comma is not + * valid JSON. + */ + int expecting_item = 1; +#endif for (; parser->pos < len; parser->pos++) { char c; @@ -185,6 +194,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, switch (c) { case '{': case '[': +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; +#endif token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) return JSMN_ERROR_NOMEM; @@ -196,6 +209,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, break; case '}': case ']': +#ifdef JSMN_STRICT + if (expecting_item) + return JSMN_ERROR_INVAL; +#endif type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); for (i = parser->toknext - 1; i >= 0; i--) { token = &tokens[i]; @@ -219,6 +236,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, } break; case '\"': +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 0; +#endif r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) @@ -229,11 +251,15 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case '\t': case '\r': case '\n': - case ':': - case ',': case ' ': break; #ifdef JSMN_STRICT + case ':': + case ',': + if (expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 1; + break; /* * In strict mode primitives are: * numbers and booleans. @@ -253,6 +279,9 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case 'f': case 'n': #else + case ':': + case ',': + break; /* * In non-strict mode every unquoted value * is a primitive. @@ -260,6 +289,12 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, /*FALL THROUGH */ default: #endif + +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 0; +#endif r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); if (r < 0) @@ -282,7 +317,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, return JSMN_ERROR_PART; } +#ifdef JSMN_STRICT + return expecting_item ? JSMN_ERROR_INVAL : JSMN_SUCCESS; +#else return JSMN_SUCCESS; +#endif } /* diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 5c2bf7275c1c..6efe73976440 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -41,19 +41,19 @@ struct pmu_events_map { const char *cpuid; const char *version; const char *type; /* core, uncore etc */ - struct pmu_event *table; + const struct pmu_event *table; }; struct pmu_sys_events { const char *name; - struct pmu_event *table; + const struct pmu_event *table; }; /* * Global table mapping each known CPU for the architecture to its * table of PMU events. */ -extern struct pmu_events_map pmu_events_map[]; -extern struct pmu_sys_events pmu_sys_event_tables[]; +extern const struct pmu_events_map pmu_events_map[]; +extern const struct pmu_sys_events pmu_sys_event_tables[]; #endif diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index d9e99b3f77e6..d8ea6a88163f 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -68,3 +68,100 @@ fd=10 type=0 config=5 optional=1 + +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index 8b04a055d154..b656ab93c5bf 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 4fca9f1bfbf8..97625090a1c4 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 @@ -114,8 +211,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event15:base-stat] -fd=15 +[event24:base-stat] +fd=24 type=3 config=1 optional=1 @@ -124,8 +221,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event16:base-stat] -fd=16 +[event25:base-stat] +fd=25 type=3 config=65537 optional=1 @@ -134,8 +231,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event17:base-stat] -fd=17 +[event26:base-stat] +fd=26 type=3 config=3 optional=1 @@ -144,8 +241,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event18:base-stat] -fd=18 +[event27:base-stat] +fd=27 type=3 config=65539 optional=1 @@ -154,8 +251,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event19:base-stat] -fd=19 +[event28:base-stat] +fd=28 type=3 config=4 optional=1 @@ -164,8 +261,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event20:base-stat] -fd=20 +[event29:base-stat] +fd=29 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index 4bb58e1c82a6..d555042e3fbf 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 @@ -114,8 +211,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event15:base-stat] -fd=15 +[event24:base-stat] +fd=24 type=3 config=1 optional=1 @@ -124,8 +221,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event16:base-stat] -fd=16 +[event25:base-stat] +fd=25 type=3 config=65537 optional=1 @@ -134,8 +231,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event17:base-stat] -fd=17 +[event26:base-stat] +fd=26 type=3 config=3 optional=1 @@ -144,8 +241,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event18:base-stat] -fd=18 +[event27:base-stat] +fd=27 type=3 config=65539 optional=1 @@ -154,8 +251,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event19:base-stat] -fd=19 +[event28:base-stat] +fd=28 type=3 config=4 optional=1 @@ -164,8 +261,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event20:base-stat] -fd=20 +[event29:base-stat] +fd=29 type=3 config=65540 optional=1 @@ -174,8 +271,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event21:base-stat] -fd=21 +[event30:base-stat] +fd=30 type=3 config=512 optional=1 @@ -184,8 +281,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event22:base-stat] -fd=22 +[event31:base-stat] +fd=31 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index b4b9a9488d51..7447a4478991 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -115,12 +115,13 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m goto out_delete_evlist; } - bzero(&parse_error, sizeof(parse_error)); + parse_events_error__init(&parse_error); /* * Set backward bit, ring buffer should be writing from end. Record * it in aux evlist */ err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error); + parse_events_error__exit(&parse_error); if (err) { pr_debug("Failed to parse tracepoint event, try use root\n"); ret = TEST_SKIP; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fa03ff0dc083..2bf146e49ce8 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -123,12 +123,13 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct parse_events_state parse_state; struct parse_events_error parse_error; - bzero(&parse_error, sizeof(parse_error)); + parse_events_error__init(&parse_error); bzero(&parse_state, sizeof(parse_state)); parse_state.error = &parse_error; INIT_LIST_HEAD(&parse_state.list); err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); + parse_events_error__exit(&parse_error); if (err || list_empty(&parse_state.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 9b4a765e4b73..f439bd49da19 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -606,7 +606,8 @@ static int do_test_code_reading(bool try_kcore) } ret = perf_event__synthesize_thread_map(NULL, threads, - perf_event__process, machine, false); + perf_event__process, machine, + true, false); if (ret < 0) { pr_debug("perf_event__synthesize_thread_map failed\n"); goto out_err; diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 0e46aeb843ce..80cff8a3558c 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -124,17 +124,19 @@ static int expand_group_events(void) evlist = evlist__new(); TEST_ASSERT_VAL("failed to get evlist", evlist); + parse_events_error__init(&err); ret = parse_events(evlist, event_str, &err); if (ret < 0) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", event_str, ret, err.str); - parse_events_print_error(&err, event_str); + parse_events_error__print(&err, event_str); goto out; } rblist__init(&metric_events); ret = test_expand_events(evlist, &metric_events); out: + parse_events_error__exit(&err); evlist__delete(evlist); return ret; } @@ -193,7 +195,7 @@ static int expand_metric_events(void) .metric_name = NULL, }, }; - struct pmu_events_map ev_map = { + const struct pmu_events_map ev_map = { .cpuid = "test", .version = "1", .type = "core", diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 4d01051951cd..077783223ce0 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -1,16 +1,62 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/debug.h" #include "util/expr.h" +#include "util/smt.h" #include "tests.h" #include <stdlib.h> #include <string.h> #include <linux/zalloc.h> +static int test_ids_union(void) +{ + struct hashmap *ids1, *ids2; + + /* Empty union. */ + ids1 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids1); + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 0); + + /* Union {foo, bar} against {}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("foo")), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("bar")), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); + + /* Union {foo, bar} against {foo}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("foo")), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); + + /* Union {foo, bar} against {bar,baz}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("bar")), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("baz")), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 3); + + ids__free(ids1); + + return 0; +} + static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, e, 1)) + if (expr__parse(&val, ctx, e)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -22,67 +68,90 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; double val; int ret; - struct expr_parse_ctx ctx; - - expr__ctx_init(&ctx); - expr__add_id_val(&ctx, strdup("FOO"), 1); - expr__add_id_val(&ctx, strdup("BAR"), 2); - - ret = test(&ctx, "1+1", 2); - ret |= test(&ctx, "FOO+BAR", 3); - ret |= test(&ctx, "(BAR/2)%2", 1); - ret |= test(&ctx, "1 - -4", 5); - ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); - ret |= test(&ctx, "1-1 | 1", 1); - ret |= test(&ctx, "1-1 & 1", 0); - ret |= test(&ctx, "min(1,2) + 1", 2); - ret |= test(&ctx, "max(1,2) + 1", 3); - ret |= test(&ctx, "1+1 if 3*4 else 0", 2); - ret |= test(&ctx, "1.1 + 2.1", 3.2); - ret |= test(&ctx, ".1 + 2.", 2.1); - ret |= test(&ctx, "d_ratio(1, 2)", 0.5); - ret |= test(&ctx, "d_ratio(2.5, 0)", 0); - ret |= test(&ctx, "1.1 < 2.2", 1); - ret |= test(&ctx, "2.2 > 1.1", 1); - ret |= test(&ctx, "1.1 < 1.1", 0); - ret |= test(&ctx, "2.2 > 2.2", 0); - ret |= test(&ctx, "2.2 < 1.1", 0); - ret |= test(&ctx, "1.1 > 2.2", 0); - - if (ret) + struct expr_parse_ctx *ctx; + + TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0); + + ctx = expr__ctx_new(); + TEST_ASSERT_VAL("expr__ctx_new", ctx); + expr__add_id_val(ctx, strdup("FOO"), 1); + expr__add_id_val(ctx, strdup("BAR"), 2); + + ret = test(ctx, "1+1", 2); + ret |= test(ctx, "FOO+BAR", 3); + ret |= test(ctx, "(BAR/2)%2", 1); + ret |= test(ctx, "1 - -4", 5); + ret |= test(ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + ret |= test(ctx, "1-1 | 1", 1); + ret |= test(ctx, "1-1 & 1", 0); + ret |= test(ctx, "min(1,2) + 1", 2); + ret |= test(ctx, "max(1,2) + 1", 3); + ret |= test(ctx, "1+1 if 3*4 else 0", 2); + ret |= test(ctx, "1.1 + 2.1", 3.2); + ret |= test(ctx, ".1 + 2.", 2.1); + ret |= test(ctx, "d_ratio(1, 2)", 0.5); + ret |= test(ctx, "d_ratio(2.5, 0)", 0); + ret |= test(ctx, "1.1 < 2.2", 1); + ret |= test(ctx, "2.2 > 1.1", 1); + ret |= test(ctx, "1.1 < 1.1", 0); + ret |= test(ctx, "2.2 > 2.2", 0); + ret |= test(ctx, "2.2 < 1.1", 0); + ret |= test(ctx, "1.1 > 2.2", 0); + + if (ret) { + expr__ctx_free(ctx); return ret; + } p = "FOO/0"; - ret = expr__parse(&val, &ctx, p, 1); + ret = expr__parse(&val, ctx, p); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, &ctx, p, 1); + ret = expr__parse(&val, ctx, p); TEST_ASSERT_VAL("missing operand", ret == -1); - expr__ctx_clear(&ctx); - TEST_ASSERT_VAL("find other", - expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", - &ctx, 1) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR", + expr__ctx_clear(ctx); + TEST_ASSERT_VAL("find ids", + expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO", + ctx) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", (void **)&val_ptr)); - expr__ctx_clear(&ctx); - TEST_ASSERT_VAL("find other", - expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", - NULL, &ctx, 3) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/", + expr__ctx_clear(ctx); + ctx->runtime = 3; + TEST_ASSERT_VAL("find ids", + expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", + NULL, ctx) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", (void **)&val_ptr)); - expr__ctx_clear(&ctx); + /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ + expr__ctx_clear(ctx); + TEST_ASSERT_VAL("find ids", + expr__find_ids("EVENT1 if #smt_on else EVENT2", + NULL, ctx) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, + smt_on() ? "EVENT1" : "EVENT2", + (void **)&val_ptr)); + + /* The expression is a constant 1.0 without needing to evaluate EVENT1. */ + expr__ctx_clear(ctx); + TEST_ASSERT_VAL("find ids", + expr__find_ids("1.0 if EVENT1 > 100.0 else 1.0", + NULL, ctx) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0); + + expr__ctx_free(ctx); return 0; } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 8d9d4cbff76d..6f2da7a72f67 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -135,7 +135,7 @@ static int synth_all(struct machine *machine) { return perf_event__synthesize_threads(NULL, perf_event__process, - machine, 0, 1); + machine, 1, 0, 1); } static int synth_process(struct machine *machine) @@ -147,7 +147,7 @@ static int synth_process(struct machine *machine) err = perf_event__synthesize_thread_map(NULL, map, perf_event__process, - machine, 0); + machine, 1, 0); perf_thread_map__put(map); return err; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index fd3556cc9ad4..6af94639b14a 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -605,7 +605,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 10); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); /* config1 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -614,7 +614,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 1); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config1")); /* config2=3 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -623,7 +623,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 3); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config2")); /* umask=1*/ term = list_entry(term->list.next, struct parse_events_term, list); @@ -661,7 +661,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); return 0; } @@ -2045,7 +2045,6 @@ static int test_event(struct evlist_test *e) struct evlist *evlist; int ret; - bzero(&err, sizeof(err)); if (e->valid && !e->valid()) { pr_debug("... SKIP"); return 0; @@ -2055,15 +2054,16 @@ static int test_event(struct evlist_test *e) if (evlist == NULL) return -ENOMEM; + parse_events_error__init(&err); ret = parse_events(evlist, e->name, &err); if (ret) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", e->name, ret, err.str); - parse_events_print_error(&err, e->name); + parse_events_error__print(&err, e->name); } else { ret = e->check(evlist); } - + parse_events_error__exit(&err); evlist__delete(evlist); return ret; diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 4f6f4904e852..dfc797ecc750 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -79,7 +79,7 @@ static struct pmu_event pme_test[] = { } }; -static struct pmu_events_map map = { +static const struct pmu_events_map map = { .cpuid = "test", .version = "1", .type = "core", diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 43743cf719ef..9ae894c406d8 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -67,7 +67,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = { .desc = "Number of segment register loads", .topic = "other", }, - .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", + .alias_str = "umask=0x80,period=0x30d40,event=0x6", .alias_long_desc = "Number of segment register loads", }; @@ -78,7 +78,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = { .desc = "Memory cluster signals to block micro-op dispatch for any reason", .topic = "other", }, - .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", + .alias_str = "umask=0x20,period=0x30d40,event=0x9", .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }; @@ -89,7 +89,7 @@ static const struct perf_pmu_test_event eist_trans = { .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .topic = "other", }, - .alias_str = "umask=0,(null)=0x30d40,event=0x3a", + .alias_str = "umask=0,period=0x30d40,event=0x3a", .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }; @@ -146,7 +146,7 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = { .name = "uncore_hisi_l3c.rd_hit_cpipe", - .event = "event=0x2", + .event = "event=0x7", .desc = "Total read hits. Unit: hisi_sccl,l3c ", .topic = "uncore", .long_desc = "Total read hits", @@ -208,8 +208,23 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = { .matching_pmu = "uncore_sys_ddr_pmu", }; +static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = { + .event = { + .name = "sys_ccn_pmu.read_cycles", + .event = "config=0x2c", + .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", + .topic = "uncore", + .pmu = "uncore_sys_ccn_pmu", + .compat = "0x01", + }, + .alias_str = "config=0x2c", + .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", + .matching_pmu = "uncore_sys_ccn_pmu", +}; + static const struct perf_pmu_test_event *sys_events[] = { &sys_ddr_pmu_write_cycles, + &sys_ccn_pmu_read_cycles, NULL }; @@ -227,9 +242,9 @@ static bool is_same(const char *reference, const char *test) return !strcmp(reference, test); } -static struct pmu_events_map *__test_pmu_get_events_map(void) +static const struct pmu_events_map *__test_pmu_get_events_map(void) { - struct pmu_events_map *map; + const struct pmu_events_map *map; for (map = &pmu_events_map[0]; map->cpuid; map++) { if (!strcmp(map->cpuid, "testcpu")) @@ -241,9 +256,9 @@ static struct pmu_events_map *__test_pmu_get_events_map(void) return NULL; } -static struct pmu_event *__test_pmu_get_sys_events_table(void) +static const struct pmu_event *__test_pmu_get_sys_events_table(void) { - struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; + const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; for ( ; tables->name; tables++) { if (!strcmp("pme_test_soc_sys", tables->name)) @@ -253,8 +268,26 @@ static struct pmu_event *__test_pmu_get_sys_events_table(void) return NULL; } -static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) +static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event *e2) { + if (!is_same(e1->name, e2->name)) { + pr_debug2("testing event e1 %s: mismatched name string, %s vs %s\n", + e1->name, e1->name, e2->name); + return -1; + } + + if (!is_same(e1->compat, e2->compat)) { + pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", + e1->name, e1->compat, e2->compat); + return -1; + } + + if (!is_same(e1->event, e2->event)) { + pr_debug2("testing event e1 %s: mismatched event, %s vs %s\n", + e1->name, e1->event, e2->event); + return -1; + } + if (!is_same(e1->desc, e2->desc)) { pr_debug2("testing event e1 %s: mismatched desc, %s vs %s\n", e1->name, e1->desc, e2->desc); @@ -273,6 +306,12 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } + if (!is_same(e1->pmu, e2->pmu)) { + pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", + e1->name, e1->pmu, e2->pmu); + return -1; + } + if (!is_same(e1->unit, e2->unit)) { pr_debug2("testing event e1 %s: mismatched unit, %s vs %s\n", e1->name, e1->unit, e2->unit); @@ -285,6 +324,12 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } + if (!is_same(e1->aggr_mode, e2->aggr_mode)) { + pr_debug2("testing event e1 %s: mismatched aggr_mode, %s vs %s\n", + e1->name, e1->aggr_mode, e2->aggr_mode); + return -1; + } + if (!is_same(e1->metric_expr, e2->metric_expr)) { pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n", e1->name, e1->metric_expr, e2->metric_expr); @@ -297,21 +342,21 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } - if (!is_same(e1->deprecated, e2->deprecated)) { - pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", - e1->name, e1->deprecated, e2->deprecated); + if (!is_same(e1->metric_group, e2->metric_group)) { + pr_debug2("testing event e1 %s: mismatched metric_group, %s vs %s\n", + e1->name, e1->metric_group, e2->metric_group); return -1; } - if (!is_same(e1->pmu, e2->pmu)) { - pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", - e1->name, e1->pmu, e2->pmu); + if (!is_same(e1->deprecated, e2->deprecated)) { + pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", + e1->name, e1->deprecated, e2->deprecated); return -1; } - if (!is_same(e1->compat, e2->compat)) { - pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", - e1->name, e1->compat, e2->compat); + if (!is_same(e1->metric_constraint, e2->metric_constraint)) { + pr_debug2("testing event e1 %s: mismatched metric_constant, %s vs %s\n", + e1->name, e1->metric_constraint, e2->metric_constraint); return -1; } @@ -375,9 +420,9 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias, /* Verify generated events from pmu-events.c are as expected */ static int test_pmu_event_table(void) { - struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); - struct pmu_events_map *map = __test_pmu_get_events_map(); - struct pmu_event *table; + const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); + const struct pmu_events_map *map = __test_pmu_get_events_map(); + const struct pmu_event *table; int map_events = 0, expected_events; /* ignore 3x sentinels */ @@ -473,7 +518,7 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count) struct perf_pmu *pmu; LIST_HEAD(aliases); int res = 0; - struct pmu_events_map *map = __test_pmu_get_events_map(); + const struct pmu_events_map *map = __test_pmu_get_events_map(); struct perf_pmu_alias *a, *tmp; if (!map) @@ -526,7 +571,7 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu) struct perf_pmu *pmu = &test_pmu->pmu; const char *pmu_name = pmu->name; struct perf_pmu_alias *a, *tmp, *alias; - struct pmu_events_map *map; + const struct pmu_events_map *map; LIST_HEAD(aliases); int res = 0; @@ -647,6 +692,16 @@ static struct perf_pmu_test_pmu test_pmus[] = { &sys_ddr_pmu_write_cycles, }, }, + { + .pmu = { + .name = (char *)"uncore_sys_ccn_pmu4", + .is_uncore = 1, + .id = (char *)"0x01", + }, + .aliases = { + &sys_ccn_pmu_read_cycles, + }, + }, }; /* Test that aliases generated are as expected */ @@ -706,6 +761,7 @@ static int check_parse_id(const char *id, struct parse_events_error *error, { struct evlist *evlist; int ret; + char *dup, *cur; /* Numbers are always valid. */ if (is_number(id)) @@ -714,16 +770,28 @@ static int check_parse_id(const char *id, struct parse_events_error *error, evlist = evlist__new(); if (!evlist) return -ENOMEM; - ret = __parse_events(evlist, id, error, fake_pmu); + + dup = strdup(id); + if (!dup) + return -ENOMEM; + + for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@')) + *cur = '/'; + + ret = __parse_events(evlist, dup, error, fake_pmu); + free(dup); + evlist__delete(evlist); return ret; } -static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) +static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event *pe) { - struct parse_events_error error = { .idx = 0, }; + struct parse_events_error error; + int ret; - int ret = check_parse_id(id, &error, NULL); + parse_events_error__init(&error); + ret = check_parse_id(id, &error, NULL); if (ret && same_cpu) { pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", pe->metric_name, id, pe->metric_expr); @@ -734,22 +802,18 @@ static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) id, pe->metric_name, pe->metric_expr); ret = 0; } - free(error.str); - free(error.help); - free(error.first_str); - free(error.first_help); + parse_events_error__exit(&error); return ret; } static int check_parse_fake(const char *id) { - struct parse_events_error error = { .idx = 0, }; - int ret = check_parse_id(id, &error, &perf_pmu__fake); + struct parse_events_error error; + int ret; - free(error.str); - free(error.help); - free(error.first_str); - free(error.first_help); + parse_events_error__init(&error); + ret = check_parse_id(id, &error, &perf_pmu__fake); + parse_events_error__exit(&error); return ret; } @@ -770,7 +834,7 @@ struct metric { static int resolve_metric_simple(struct expr_parse_ctx *pctx, struct list_head *compound_list, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *metric_name) { struct hashmap_entry *cur, *cur_tmp; @@ -781,9 +845,9 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, do { all = true; - hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { + hashmap__for_each_entry_safe(pctx->ids, cur, cur_tmp, bkt) { struct metric_ref *ref; - struct pmu_event *pe; + const struct pmu_event *pe; pe = metricgroup__find_metric(cur->key, map); if (!pe) @@ -811,7 +875,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, ref->metric_expr = pe->metric_expr; list_add_tail(&metric->list, compound_list); - rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); + rc = expr__find_ids(pe->metric_expr, NULL, pctx); if (rc) goto out_err; break; /* The hashmap has been modified, so restart */ @@ -830,14 +894,19 @@ out_err: static int test_parsing(void) { - struct pmu_events_map *cpus_map = pmu_events_map__find(); - struct pmu_events_map *map; - struct pmu_event *pe; + const struct pmu_events_map *cpus_map = pmu_events_map__find(); + const struct pmu_events_map *map; + const struct pmu_event *pe; int i, j, k; int ret = 0; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; double result; + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return TEST_FAIL; + } i = 0; for (;;) { map = &pmu_events_map[i++]; @@ -855,15 +924,14 @@ static int test_parsing(void) break; if (!pe->metric_expr) continue; - expr__ctx_init(&ctx); - if (expr__find_other(pe->metric_expr, NULL, &ctx, 0) - < 0) { - expr_failure("Parse other failed", map, pe); + expr__ctx_clear(ctx); + if (expr__find_ids(pe->metric_expr, NULL, ctx) < 0) { + expr_failure("Parse find ids failed", map, pe); ret++; continue; } - if (resolve_metric_simple(&ctx, &compound_list, map, + if (resolve_metric_simple(ctx, &compound_list, map, pe->metric_name)) { expr_failure("Could not resolve metrics", map, pe); ret++; @@ -876,27 +944,27 @@ static int test_parsing(void) * make them unique. */ k = 1; - hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id_val(&ctx, strdup(cur->key), k++); + hashmap__for_each_entry(ctx->ids, cur, bkt) + expr__add_id_val(ctx, strdup(cur->key), k++); - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { if (check_parse_cpu(cur->key, map == cpus_map, pe)) ret++; } list_for_each_entry_safe(metric, tmp, &compound_list, list) { - expr__add_ref(&ctx, &metric->metric_ref); + expr__add_ref(ctx, &metric->metric_ref); free(metric); } - if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { + if (expr__parse(&result, ctx, pe->metric_expr)) { expr_failure("Parse failed", map, pe); ret++; } - expr__ctx_clear(&ctx); } } + expr__ctx_free(ctx); /* TODO: fail when not ok */ exit: return ret == 0 ? TEST_OK : TEST_SKIP; @@ -916,7 +984,7 @@ static struct test_metric metrics[] = { static int metric_parse_fake(const char *str) { - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; struct hashmap_entry *cur; double result; int ret = -1; @@ -925,9 +993,13 @@ static int metric_parse_fake(const char *str) pr_debug("parsing '%s'\n", str); - expr__ctx_init(&ctx); - if (expr__find_other(str, NULL, &ctx, 0) < 0) { - pr_err("expr__find_other failed\n"); + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return TEST_FAIL; + } + if (expr__find_ids(str, NULL, ctx) < 0) { + pr_err("expr__find_ids failed\n"); return -1; } @@ -937,23 +1009,23 @@ static int metric_parse_fake(const char *str) * make them unique. */ i = 1; - hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id_val(&ctx, strdup(cur->key), i++); + hashmap__for_each_entry(ctx->ids, cur, bkt) + expr__add_id_val(ctx, strdup(cur->key), i++); - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { if (check_parse_fake(cur->key)) { pr_err("check_parse_fake failed\n"); goto out; } } - if (expr__parse(&result, &ctx, str, 0)) + if (expr__parse(&result, ctx, str)) pr_err("expr__parse failed\n"); else ret = 0; out: - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return ret; } @@ -964,8 +1036,8 @@ out: */ static int test_parsing_fake(void) { - struct pmu_events_map *map; - struct pmu_event *pe; + const struct pmu_events_map *map; + const struct pmu_event *pe; unsigned int i, j; int err = 0; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 8fd8a4ef97da..c83a11514129 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -13,6 +13,7 @@ #include "evsel.h" #include "debug.h" #include "util/synthetic-events.h" +#include "util/trace-event.h" #include "tests.h" @@ -30,9 +31,18 @@ } \ } while (0) +/* + * Hardcode the expected values for branch_entry flags. + * These are based on the input value (213) specified + * in branch_stack variable. + */ +#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_LE 0xd5000000 +#define FLAG(s) s->branch_stack->entries[i].flags + static bool samples_same(const struct perf_sample *s1, const struct perf_sample *s2, - u64 type, u64 read_format) + u64 type, u64 read_format, bool needs_swap) { size_t i; @@ -100,8 +110,14 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); COMP(branch_stack->hw_idx); - for (i = 0; i < s1->branch_stack->nr; i++) - MCOMP(branch_stack->entries[i]); + for (i = 0; i < s1->branch_stack->nr; i++) { + if (needs_swap) + return ((tep_is_bigendian()) ? + (FLAG(s2).value == BS_EXPECTED_BE) : + (FLAG(s2).value == BS_EXPECTED_LE)); + else + MCOMP(branch_stack->entries[i]); + } } if (type & PERF_SAMPLE_REGS_USER) { @@ -248,7 +264,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; - struct perf_sample sample_out; + struct perf_sample sample_out, sample_out_endian; size_t i, sz, bufsz; int err, ret = -1; @@ -313,12 +329,29 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) goto out_free; } - if (!samples_same(&sample, &sample_out, sample_type, read_format)) { + if (!samples_same(&sample, &sample_out, sample_type, read_format, evsel.needs_swap)) { pr_debug("parsing failed for sample_type %#"PRIx64"\n", sample_type); goto out_free; } + if (sample_type == PERF_SAMPLE_BRANCH_STACK) { + evsel.needs_swap = true; + evsel.sample_size = __evsel__sample_size(sample_type); + err = evsel__parse_sample(&evsel, event, &sample_out_endian); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "evsel__parse_sample", sample_type, err); + goto out_free; + } + + if (!samples_same(&sample, &sample_out_endian, sample_type, read_format, evsel.needs_swap)) { + pr_debug("parsing failed for sample_type %#"PRIx64"\n", + sample_type); + goto out_free; + } + } + ret = 0; out_free: free(event); diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index bf9e729b3ecf..8d9c04e450ae 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -15,9 +15,6 @@ skip_if_no_perf_probe || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh -perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -file=$(mktemp /tmp/temporary_file.XXXXX) - record_open_file() { echo "Recording open file:" perf record -o ${perfdata} -e probe:vfs_getname\* touch $file @@ -35,6 +32,9 @@ if [ $err -ne 0 ] ; then exit $err fi +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + record_open_file && perf_script_filenames err=$? rm -f ${perfdata} diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh new file mode 100755 index 000000000000..de24d374ce24 --- /dev/null +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# perf all metricgroups test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for m in $(perf list --raw-dump metricgroups); do + echo "Testing $m" + perf stat -M "$m" true +done + +exit 0 diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh new file mode 100755 index 000000000000..7f4ba3cad632 --- /dev/null +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# perf all metrics test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for m in $(perf list --raw-dump metrics); do + echo "Testing $m" + result=$(perf stat -M "$m" true 2>&1) + if [[ ! "$result" =~ "$m" ]] && [[ ! "$result" =~ "<not supported>" ]]; then + # We failed to see the metric and the events are support. Possibly the + # workload was too small so retry with something longer. + result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + if [[ ! "$result" =~ "$m" ]]; then + echo "Metric '$m' not printed in:" + echo "$result" + exit 1 + fi + fi +done + +exit 0 diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh new file mode 100755 index 000000000000..2de7fd0394fd --- /dev/null +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# perf all PMU test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for p in $(perf list --raw-dump pmu); do + echo "Testing $p" + result=$(perf stat -e "$p" true 2>&1) + if [[ ! "$result" =~ "$p" ]] && [[ ! "$result" =~ "<not supported>" ]]; then + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if [[ ! "$result" =~ "$p" ]]; then + echo "Event '$p' not printed in:" + echo "$result" + exit 1 + fi + fi +done + +exit 0 diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index c9eef0bba6f1..6de53b7ef5ff 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -9,8 +9,6 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan <leo.yan@linaro.org>, 2020 -perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -file=$(mktemp /tmp/temporary_file.XXXXX) glb_err=0 skip_if_no_cs_etm_event() { @@ -22,13 +20,20 @@ skip_if_no_cs_etm_event() { skip_if_no_cs_etm_event || exit 2 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + cleanup_files() { rm -f ${perfdata} rm -f ${file} + rm -f "${perfdata}.old" + trap - exit term int + kill -2 $$ + exit $glb_err } -trap cleanup_files exit +trap cleanup_files exit term int record_touch_file() { echo "Recording trace (only user mode) with path: CPU$2 => $1" diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3d31c1d560d6..3d60e993d2b8 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,8 +17,6 @@ skip_if_no_perf_trace || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh -file=$(mktemp /tmp/temporary_file.XXXXX) - trace_open_vfs_getname() { evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ @@ -32,6 +30,8 @@ if [ $err -ne 0 ] ; then exit $err fi +file=$(mktemp /tmp/temporary_file.XXXXX) + # Do not use whatever ~/.perfconfig file, it may change the output # via trace.{show_timestamp,show_prefix,etc} export PERF_CONFIG=/dev/null diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index b9028e304ddd..4574c46260d9 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -49,7 +49,9 @@ static int session_write_header(char *path) session->evlist = evlist__new(); TEST_ASSERT_VAL("can't get evlist", session->evlist); + parse_events_error__init(&err); parse_events(session->evlist, "cpu_core/cycles/", &err); + parse_events_error__exit(&err); } perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 193b7c91b4e2..4f884aabc7f4 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -3,6 +3,7 @@ #include <linux/rbtree.h> #include <inttypes.h> #include <string.h> +#include <ctype.h> #include <stdlib.h> #include "dso.h" #include "map.h" @@ -14,6 +15,102 @@ #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) +static bool is_ignored_symbol(const char *name, char type) +{ + /* Symbol names that exactly match to the following are ignored.*/ + static const char * const ignored_symbols[] = { + /* + * Symbols which vary between passes. Passes 1 and 2 must have + * identical symbol lists. The kallsyms_* symbols below are + * only added after pass 1, they would be included in pass 2 + * when --all-symbols is specified so exclude them to get a + * stable symbol list. + */ + "kallsyms_addresses", + "kallsyms_offsets", + "kallsyms_relative_base", + "kallsyms_num_syms", + "kallsyms_names", + "kallsyms_markers", + "kallsyms_token_table", + "kallsyms_token_index", + /* Exclude linker generated symbols which vary between passes */ + "_SDA_BASE_", /* ppc */ + "_SDA2_BASE_", /* ppc */ + NULL + }; + + /* Symbol names that begin with the following are ignored.*/ + static const char * const ignored_prefixes[] = { + "$", /* local symbols for ARM, MIPS, etc. */ + ".LASANPC", /* s390 kasan local symbols */ + "__crc_", /* modversions */ + "__efistub_", /* arm64 EFI stub namespace */ + "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__AArch64ADRPThunk_", /* arm64 lld */ + "__ARMV5PILongThunk_", /* arm lld */ + "__ARMV7PILongThunk_", + "__ThumbV7PILongThunk_", + "__LA25Thunk_", /* mips lld */ + "__microLA25Thunk_", + NULL + }; + + /* Symbol names that end with the following are ignored.*/ + static const char * const ignored_suffixes[] = { + "_from_arm", /* arm */ + "_from_thumb", /* arm */ + "_veneer", /* arm */ + NULL + }; + + /* Symbol names that contain the following are ignored.*/ + static const char * const ignored_matches[] = { + ".long_branch.", /* ppc stub */ + ".plt_branch.", /* ppc stub */ + NULL + }; + + const char * const *p; + + for (p = ignored_symbols; *p; p++) + if (!strcmp(name, *p)) + return true; + + for (p = ignored_prefixes; *p; p++) + if (!strncmp(name, *p, strlen(*p))) + return true; + + for (p = ignored_suffixes; *p; p++) { + int l = strlen(name) - strlen(*p); + + if (l >= 0 && !strcmp(name + l, *p)) + return true; + } + + for (p = ignored_matches; *p; p++) { + if (strstr(name, *p)) + return true; + } + + if (type == 'U' || type == 'u') + return true; + /* exclude debugging symbols */ + if (type == 'N' || type == 'n') + return true; + + if (toupper(type) == 'A') { + /* Keep these useful absolute symbols */ + if (strcmp(name, "__kernel_syscall_via_break") && + strcmp(name, "__kernel_syscall_via_epc") && + strcmp(name, "__kernel_sigtramp") && + strcmp(name, "__gp")) + return true; + } + + return false; +} + int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; @@ -169,6 +266,11 @@ next_pair: * such as __indirect_thunk_end. */ continue; + } else if (is_ignored_symbol(sym->name, sym->type)) { + /* + * Ignore hidden symbols, see scripts/kallsyms.c for the details + */ + continue; } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 041d6032a348..8ef26d89ef49 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -364,6 +364,8 @@ struct ucred { #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 +#define SOL_MPTCP 284 +#define SOL_MCTP 285 /* IPX options */ #define IPX_TYPE 1 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f2914d5bed6e..2e5bfbb69960 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -138,6 +138,7 @@ perf-y += expr.o perf-y += branch.o perf-y += mem2node.o perf-y += clockid.o +perf-y += list_sort.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -201,6 +202,7 @@ endif perf-y += perf-hooks.o perf-$(CONFIG_LIBBPF) += bpf-event.o +perf-$(CONFIG_LIBBPF) += bpf-utils.o perf-$(CONFIG_CXX) += c++/ @@ -315,3 +317,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0bae061b2d6d..8511af55fc3a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -28,6 +28,7 @@ #include "evsel.h" #include "evlist.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "block-range.h" #include "string2.h" #include "util/event.h" @@ -151,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/riscv64/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" @@ -183,7 +185,6 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), - .ins_is_fused = x86__ins_is_fused, .objdump = { .comment_char = '#', }, @@ -193,6 +194,10 @@ static struct arch architectures[] = { .init = powerpc__annotate_init, }, { + .name = "riscv64", + .init = riscv64__annotate_init, + }, + { .name = "s390", .init = s390__annotate_init, .objdump = { @@ -1700,12 +1705,12 @@ static int symbol__disassemble_bpf(struct symbol *sym, { struct annotation *notes = symbol__annotation(sym); struct annotation_options *opts = args->options; - struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; disassembler_ftype disassemble; struct map *map = args->ms.map; + struct perf_bpil *info_linear; struct disassemble_info info; struct dso *dso = map->dso; int pc = 0, count, sub_id; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2e5eff4f8f03..2f311189c6e8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,7 +13,7 @@ #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8d2865b9ade2..c679394b898d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1564,6 +1564,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'q': synth_opts->quick += 1; break; + case 'A': + synth_opts->approx_ipc = true; + break; case 'Z': synth_opts->timeless_decoding = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 5f383908ca6e..bbf0d78c6401 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -59,6 +59,7 @@ enum itrace_period_type { #define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) #define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) +#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a')) /** * struct itrace_synth_opts - AUX area tracing synthesis options. @@ -84,6 +85,7 @@ enum itrace_period_type { * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events * @add_last_branch: add branch context to existing event records + * @approx_ipc: approximate IPC * @flc: whether to synthesize first level cache events * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events @@ -127,6 +129,7 @@ struct itrace_synth_opts { bool thread_stack; bool last_branch; bool add_last_branch; + bool approx_ipc; bool flc; bool llc; bool tlb; @@ -639,6 +642,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " d[flags]: create a debug log\n" \ " each flag must be preceded by + or -\n" \ " log flags are: a (all perf events)\n" \ +" o (output to stdout)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ @@ -649,6 +653,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ " q: quicker (less detailed) decoding\n" \ +" A: approximate IPC\n" \ +" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1a7112a87736..4d3b4cdce176 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,6 +10,7 @@ #include <internal/lib.h> #include <symbol/kallsyms.h> #include "bpf-event.h" +#include "bpf-utils.h" #include "debug.h" #include "dso.h" #include "symbol.h" @@ -32,8 +33,6 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } -#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) - static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { int ret = 0; @@ -48,9 +47,9 @@ static int machine__process_bpf_event_load(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = machine->env; + struct perf_bpil *info_linear; int id = event->bpf.id; unsigned int i; @@ -110,7 +109,7 @@ static int perf_env__fetch_btf(struct perf_env *env, u32 data_size; const void *data; - data = btf__get_raw_data(btf, &data_size); + data = btf__raw_data(btf, &data_size); node = malloc(data_size + sizeof(struct btf_node)); if (!node) @@ -175,9 +174,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; - struct bpf_prog_info_linear *info_linear; struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -191,15 +190,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ env = session->data ? &session->header.env : &perf_env; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { info_linear = NULL; pr_debug("%s: failed to get BPF program info. aborting\n", __func__); @@ -452,8 +451,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, static void perf_env__add_bpf_info(struct perf_env *env, u32 id) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; @@ -463,15 +462,15 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (fd < 0) return; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); goto out; @@ -576,7 +575,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", info->id, name, prog_addrs[0], prog_lens[0]); - return; + goto out; } fprintf(fp, "# bpf_prog_info %u:\n", info->id); @@ -586,4 +585,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", i, name, prog_addrs[i], prog_lens[i]); } +out: + btf__free(btf); } diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 68f315c3df5b..144a8a24cc69 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -19,7 +19,7 @@ struct evlist; struct target; struct bpf_prog_info_node { - struct bpf_prog_info_linear *info_linear; + struct perf_bpil *info_linear; struct rb_node rb_node; }; diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c new file mode 100644 index 000000000000..e271e05e51bc --- /dev/null +++ b/tools/perf/util/bpf-utils.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <errno.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <bpf/bpf.h> +#include "bpf-utils.h" +#include "debug.h" + +struct bpil_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpil_array_desc bpil_array_desc[] = { + [PERF_BPIL_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [PERF_BPIL_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [PERF_BPIL_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [PERF_BPIL_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [PERF_BPIL_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [PERF_BPIL_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [PERF_BPIL_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [PERF_BPIL_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [PERF_BPIL_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, + int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, + int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info info = {}; + struct perf_bpil *info_linear; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> PERF_BPIL_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpil_array_desc *desc; + __u32 count, size; + + desc = bpil_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < (__u32)desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct perf_bpil) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpil_addr_to_offs(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpil_offs_to_addr(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h new file mode 100644 index 000000000000..86a5055cdfad --- /dev/null +++ b/tools/perf/util/bpf-utils.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __PERF_BPF_UTILS_H +#define __PERF_BPF_UTILS_H + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +#ifdef HAVE_LIBBPF_SUPPORT + +#include <bpf/libbpf.h> + +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing perf_bpil to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpil_addr_to_offs() and bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << PERF_BPIL_MAP_IDS) | + * (1UL << PERF_BPIL_PROG_TAGS); + * struct perf_bpil *info_linear = + * get_bpf_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, <proper_size>); + * bpil_offs_to_addr(info_linear); + */ +enum perf_bpil_array_types { + PERF_BPIL_FIRST_ARRAY = 0, + PERF_BPIL_JITED_INSNS = 0, + PERF_BPIL_XLATED_INSNS, + PERF_BPIL_MAP_IDS, + PERF_BPIL_JITED_KSYMS, + PERF_BPIL_JITED_FUNC_LENS, + PERF_BPIL_FUNC_INFO, + PERF_BPIL_LINE_INFO, + PERF_BPIL_JITED_LINE_INFO, + PERF_BPIL_PROG_TAGS, + PERF_BPIL_LAST_ARRAY, +}; + +struct perf_bpil { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays); + +void +bpil_addr_to_offs(struct perf_bpil *info_linear); + +void +bpil_offs_to_addr(struct perf_bpil *info_linear); + +#endif /* HAVE_LIBBPF_SUPPORT */ +#endif /* __PERF_BPF_UTILS_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ba0f20853651..c17d4a43ce06 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -13,6 +13,7 @@ #include <perf/bpf_perf.h> #include "bpf_counter.h" +#include "bpf-utils.h" #include "counts.h" #include "debug.h" #include "evsel.h" @@ -61,14 +62,13 @@ static int bpf_program_profiler__destroy(struct evsel *evsel) static char *bpf_target_prog_name(int tgt_fd) { - struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; + struct perf_bpil *info_linear; const struct btf_type *t; struct btf *btf = NULL; char *name = NULL; - info_linear = bpf_program__get_prog_info_linear( - tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); return NULL; @@ -127,9 +127,9 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) skel->rodata->num_cpu = evsel__nr_cpus(evsel); - bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); - bpf_map__resize(skel->maps.fentry_readings, 1); - bpf_map__resize(skel->maps.accum_readings, 1); + bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__set_max_entries(skel->maps.fentry_readings, 1); + bpf_map__set_max_entries(skel->maps.accum_readings, 1); prog_name = bpf_target_prog_name(prog_fd); if (!prog_name) { @@ -399,7 +399,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return -1; } - bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus()); err = bperf_leader_bpf__load(skel); if (err) { pr_err("Failed to load leader skeleton\n"); diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 89aa5e71db1a..cbc6c2bca488 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -65,14 +65,14 @@ static int bperf_load_program(struct evlist *evlist) /* we need one copy of events per cpu for reading */ map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.events, map_size); - bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); + bpf_map__set_max_entries(skel->maps.events, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); /* previous result is saved in a per-cpu array */ map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.prev_readings, map_size); + bpf_map__set_max_entries(skel->maps.prev_readings, map_size); /* cgroup result needs all events (per-cpu) */ map_size = evlist->core.nr_entries; - bpf_map__resize(skel->maps.cgrp_readings, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); set_max_rlimit(); diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index c8885dfa3667..df7b18fb6b6e 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -43,8 +43,6 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-cc1", "-triple", "bpf-pc-linux", "-fsyntax-only", - "-ferror-limit", "19", - "-fmessage-length", "127", "-O2", "-nostdsysteminc", "-nobuiltininc", @@ -55,7 +53,11 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-x", "c"}; CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs); + CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs +#if CLANG_VERSION_MAJOR >= 11 + ,/*BinaryName=*/nullptr +#endif + ); FrontendOptions& Opts = CI->getFrontendOpts(); Opts.Inputs.clear(); @@ -151,13 +153,16 @@ getBPFObjectFromModule(llvm::Module *Module) legacy::PassManager PM; bool NotAdded; -#if CLANG_VERSION_MAJOR < 7 - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile); + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream +#if CLANG_VERSION_MAJOR >= 7 + , /*DwoOut=*/nullptr +#endif +#if CLANG_VERSION_MAJOR < 10 + , TargetMachine::CGFT_ObjectFile #else - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, - TargetMachine::CGFT_ObjectFile); + , llvm::CGFT_ObjectFile #endif + ); if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index aa862a26d95c..8f7705bbc2da 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1437,7 +1437,7 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); #else bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..c7a9fa0ffae9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,6 +24,16 @@ #include "util/parse-sublevel-options.h" #include <linux/ctype.h> +#include <traceevent/event-parse.h> + +#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) +#ifndef LIBTRACEEVENT_VERSION +/* + * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships + * with the Linux kernel tools. + */ +#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0) +#endif int verbose; int debug_peo_args; @@ -228,6 +238,15 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; +#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION + if (verbose == 1) + tep_set_loglevel(TEP_LOG_INFO); + else if (verbose == 2) + tep_set_loglevel(TEP_LOG_DEBUG); + else if (verbose >= 3) + tep_set_loglevel(TEP_LOG_ALL); +#endif + return 0; } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 9ed9a5676d35..9cc8a1772b4b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -14,6 +14,7 @@ #ifdef HAVE_LIBBPF_SUPPORT #include <bpf/libbpf.h> #include "bpf-event.h" +#include "bpf-utils.h" #endif #include "compress.h" #include "env.h" diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 83723ba11dc8..011da3924fc1 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -193,7 +193,7 @@ struct dso { int fd; int status; u32 status_seen; - size_t file_size; + u64 file_size; struct list_head open_entry; u64 debug_frame_offset; u64 eh_frame_hdr_offset; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cf773f0dec38..17f1dd0680b4 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -16,6 +16,7 @@ struct perf_env perf_env; #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" +#include "bpf-utils.h" #include <bpf/libbpf.h> void perf_env__insert_bpf_prog_info(struct perf_env *env, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ac706304afe9..fe24801f8e9f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -57,6 +57,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", + [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -237,6 +238,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } +int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_aux_output_hw_id_event(machine, event); +} + int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -407,6 +416,12 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " hw_id: %#"PRI_lx64"\n", + event->aux_output_hw_id.hw_id); +} + size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; @@ -534,6 +549,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_TEXT_POKE: ret += perf_event__fprintf_text_poke(event, machine, fp); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret += perf_event__fprintf_aux_output_hw_id(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 19ad64f2bd83..95ffed66369c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -330,6 +330,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_aux_output_hw_id(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -397,6 +401,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbfeceb2546c..ec967fb8d7d9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -294,7 +294,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) +struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) { struct perf_event_attr attr = { .type = type, @@ -305,18 +305,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) event_attr_init(&attr); - if (!precise) - goto new_event; - /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. */ -new_event: evsel = evsel__new(&attr); if (evsel == NULL) goto out; + arch_evsel__fixup_new_cycles(&evsel->core.attr); + evsel->precise_max = true; /* use asprintf() because free(evsel) assumes name is allocated */ @@ -410,6 +408,11 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->filter == NULL) goto out_err; } + if (orig->metric_id) { + evsel->metric_id = strdup(orig->metric_id); + if (evsel->metric_id == NULL) + goto out_err; + } evsel->cgrp = cgroup__get(orig->cgrp); evsel->tp_format = orig->tp_format; evsel->handler = orig->handler; @@ -779,6 +782,17 @@ out_unknown: return "unknown"; } +const char *evsel__metric_id(const struct evsel *evsel) +{ + if (evsel->metric_id) + return evsel->metric_id; + + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) + return "duration_time"; + + return "unknown"; +} + const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; @@ -1047,6 +1061,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } +void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) +{ +} + /* * The enable_on_exec/disabled value strategy: * @@ -1423,6 +1441,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; @@ -1807,7 +1826,7 @@ static void evsel__disable_missing_features(struct evsel *evsel) evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) evsel->core.attr.mmap2 = 0; - if (perf_missing_features.exclude_guest) + if (evsel->pmu && evsel->pmu->missing_features.exclude_guest) evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0; if (perf_missing_features.lbr_flags) evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | @@ -1900,10 +1919,27 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if (!perf_missing_features.exclude_guest && - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && + (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { + if (evsel->pmu == NULL) { + evsel->pmu = evsel__find_pmu(evsel); + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; + } + } + + if (evsel->exclude_GH) { + pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); + return false; + } + if (!perf_missing_features.exclude_guest) { + perf_missing_features.exclude_guest = true; + pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } return true; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; @@ -2221,6 +2257,54 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data, data->weight = *array; } +u64 evsel__bitfield_swap_branch_flags(u64 value) +{ + u64 new_val = 0; + + /* + * branch_flags + * union { + * u64 values; + * struct { + * mispred:1 //target mispredicted + * predicted:1 //target predicted + * in_tx:1 //in transaction + * abort:1 //transaction abort + * cycles:16 //cycle count to last branch + * type:4 //branch type + * reserved:40 + * } + * } + * + * Avoid bswap64() the entire branch_flag.value, + * as it has variable bit-field sizes. Instead the + * macro takes the bit-field position/size, + * swaps it based on the host endianness. + * + * tep_is_bigendian() is used here instead of + * bigendian() to avoid python test fails. + */ + if (tep_is_bigendian()) { + new_val = bitfield_swap(value, 0, 1); + new_val |= bitfield_swap(value, 1, 1); + new_val |= bitfield_swap(value, 2, 1); + new_val |= bitfield_swap(value, 3, 1); + new_val |= bitfield_swap(value, 4, 16); + new_val |= bitfield_swap(value, 20, 4); + new_val |= bitfield_swap(value, 24, 40); + } else { + new_val = bitfield_swap(value, 63, 1); + new_val |= bitfield_swap(value, 62, 1); + new_val |= bitfield_swap(value, 61, 1); + new_val |= bitfield_swap(value, 60, 1); + new_val |= bitfield_swap(value, 44, 16); + new_val |= bitfield_swap(value, 40, 4); + new_val |= bitfield_swap(value, 0, 40); + } + + return new_val; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2408,6 +2492,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_BRANCH_STACK) { const u64 max_branch_nr = UINT64_MAX / sizeof(struct branch_entry); + struct branch_entry *e; + unsigned int i; OVERFLOW_CHECK_u64(array); data->branch_stack = (struct branch_stack *)array++; @@ -2416,10 +2502,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) { sz += sizeof(u64); - else + e = &data->branch_stack->entries[0]; + } else { data->no_hw_idx = true; + /* + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, + * only nr and entries[] will be output by kernel. + */ + e = (struct branch_entry *)&data->branch_stack->hw_idx; + } + + if (swapped) { + /* + * struct branch_flag does not have endian + * specific bit field definition. And bswap + * will not resolve the issue, since these + * are bit fields. + * + * evsel__bitfield_swap_branch_flags() uses a + * bitfield_swap macro to swap the bit position + * based on the host endians. + */ + for (i = 0; i < data->branch_stack->nr; i++, e++) + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value); + } + OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1f7edfa8568a..3ea687141afa 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -22,6 +22,7 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; +struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -68,6 +69,7 @@ struct evsel { double scale; const char *unit; struct cgroup *cgrp; + const char *metric_id; enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; @@ -152,6 +154,9 @@ struct evsel { }; unsigned long open_flags; int precise_ip_original; + + /* for missing_features */ + struct perf_pmu *pmu; }; struct perf_missing_features { @@ -261,6 +266,7 @@ bool evsel__match_bpf_counter_events(const char *name); int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); +const char *evsel__metric_id(const struct evsel *evsel); const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -277,6 +283,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); @@ -482,4 +489,17 @@ struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); + +/* + * Macro to swap the bit-field postition and size. + * Used when, + * - dont need to swap the entire u64 && + * - when u64 has variable bit-field sizes && + * - when presented in a host endian which is different + * than the source endian of the perf.data file + */ +#define bitfield_swap(src, pos, size) \ + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1))) + +u64 evsel__bitfield_swap_branch_flags(u64 value); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index bfedd7b23521..8c2ea8001329 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -11,6 +11,7 @@ #include "strlist.h" #include "symbol.h" #include "srcline.h" +#include "dso.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -144,12 +145,17 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_arrow && !first) printed += fprintf(fp, " <-"); - if (print_ip) - printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); - if (map) addr = map->map_ip(map, node->ip); + if (print_ip) { + /* Show binary offset for userspace addr */ + if (map && !map->dso->kernel) + printed += fprintf(fp, "%c%16" PRIx64, s, addr); + else + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + } + if (print_sym) { printed += fprintf(fp, " "); node_al.addr = addr; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index a850fd0be3ee..77c6ad81a923 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -25,7 +25,6 @@ struct expr_id_data { const char *metric_name; const char *metric_expr; } ref; - struct expr_id *parent; }; enum { @@ -35,8 +34,6 @@ struct expr_id_data { EXPR_ID_DATA__REF, /* A reference but the value has been computed. */ EXPR_ID_DATA__REF_VALUE, - /* A parent is remembered for the recursion check. */ - EXPR_ID_DATA__PARENT, } kind; }; @@ -59,21 +56,34 @@ static bool key_equal(const void *key1, const void *key2, return !strcmp((const char *)key1, (const char *)key2); } -/* Caller must make sure id is allocated */ -int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +struct hashmap *ids__new(void) +{ + return hashmap__new(key_hash, key_equal, NULL); +} + +void ids__free(struct hashmap *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (ids == NULL) + return; + + hashmap__for_each_entry(ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + + hashmap__free(ids); +} + +int ids__insert(struct hashmap *ids, const char *id) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; - data_ptr = malloc(sizeof(*data_ptr)); - if (!data_ptr) - return -ENOMEM; - - data_ptr->parent = ctx->parent; - data_ptr->kind = EXPR_ID_DATA__PARENT; - - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -82,6 +92,48 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) return ret; } +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) +{ + size_t bkt; + struct hashmap_entry *cur; + int ret; + struct expr_id_data *old_data = NULL; + char *old_key = NULL; + + if (!ids1) + return ids2; + + if (!ids2) + return ids1; + + if (hashmap__size(ids1) < hashmap__size(ids2)) { + struct hashmap *tmp = ids1; + + ids1 = ids2; + ids2 = tmp; + } + hashmap__for_each_entry(ids2, cur, bkt) { + ret = hashmap__set(ids1, cur->key, cur->value, + (const void **)&old_key, (void **)&old_data); + free(old_key); + free(old_data); + + if (ret) { + hashmap__free(ids1); + hashmap__free(ids2); + return NULL; + } + } + hashmap__free(ids2); + return ids1; +} + +/* Caller must make sure id is allocated */ +int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +{ + return ids__insert(ctx->ids, id); +} + /* Caller must make sure id is allocated */ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) { @@ -95,7 +147,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) data_ptr->val = val; data_ptr->kind = EXPR_ID_DATA__VALUE; - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -140,7 +192,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) data_ptr->ref.metric_expr = ref->metric_expr; data_ptr->kind = EXPR_ID_DATA__REF; - ret = hashmap__set(&ctx->ids, name, data_ptr, + ret = hashmap__set(ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -156,9 +208,24 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { - return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; + return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; +} + +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles) +{ + struct hashmap_entry *cur; + size_t bkt; + struct expr_id_data *data; + + hashmap__for_each_entry(needles->ids, cur, bkt) { + if (expr__get_id(haystack, cur->key, &data)) + return false; + } + return true; } + int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap) { @@ -175,15 +242,12 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, case EXPR_ID_DATA__VALUE: pr_debug2("lookup(%s): val %f\n", id, data->val); break; - case EXPR_ID_DATA__PARENT: - pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); - break; case EXPR_ID_DATA__REF: pr_debug2("lookup(%s): ref metric name %s\n", id, data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); data->kind = EXPR_ID_DATA__REF_VALUE; - if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) { pr_debug("%s failed to count\n", id); return -1; } @@ -205,15 +269,24 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id) struct expr_id_data *old_val = NULL; char *old_key = NULL; - hashmap__delete(&ctx->ids, id, + hashmap__delete(ctx->ids, id, (const void **)&old_key, (void **)&old_val); free(old_key); free(old_val); } -void expr__ctx_init(struct expr_parse_ctx *ctx) +struct expr_parse_ctx *expr__ctx_new(void) { - hashmap__init(&ctx->ids, key_hash, key_equal, NULL); + struct expr_parse_ctx *ctx; + + ctx = malloc(sizeof(struct expr_parse_ctx)); + if (!ctx) + return NULL; + + ctx->ids = hashmap__new(key_hash, key_equal, NULL); + ctx->runtime = 0; + + return ctx; } void expr__ctx_clear(struct expr_parse_ctx *ctx) @@ -221,20 +294,32 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx) struct hashmap_entry *cur; size_t bkt; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + hashmap__clear(ctx->ids); +} + +void expr__ctx_free(struct expr_parse_ctx *ctx) +{ + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry(ctx->ids, cur, bkt) { free((char *)cur->key); free(cur->value); } - hashmap__clear(&ctx->ids); + hashmap__free(ctx->ids); + free(ctx); } static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, - int start, int runtime) + bool compute_ids) { struct expr_scanner_ctx scanner_ctx = { - .start_token = start, - .runtime = runtime, + .runtime = ctx->runtime, }; YY_BUFFER_STATE buffer; void *scanner; @@ -253,7 +338,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, expr_set_debug(1, scanner); #endif - ret = expr_parse(val, ctx, scanner); + ret = expr_parse(val, ctx, compute_ids, scanner); expr__flush_buffer(buffer, scanner); expr__delete_buffer(buffer, scanner); @@ -262,15 +347,15 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, } int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime) + const char *expr) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0; } -int expr__find_other(const char *expr, const char *one, - struct expr_parse_ctx *ctx, int runtime) +int expr__find_ids(const char *expr, const char *one, + struct expr_parse_ctx *ctx) { - int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); + int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true); if (one) expr__del_id(ctx, one); @@ -285,9 +370,3 @@ double expr_id_data__value(const struct expr_id_data *data) assert(data->kind == EXPR_ID_DATA__REF_VALUE); return data->ref.val; } - -struct expr_id *expr_id_data__parent(struct expr_id_data *data) -{ - assert(data->kind == EXPR_ID_DATA__PARENT); - return data->parent; -} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 85df3e4771e4..cf81f9166dbb 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -13,39 +13,47 @@ struct metric_ref; -struct expr_id { - char *id; - struct expr_id *parent; -}; - struct expr_parse_ctx { - struct hashmap ids; - struct expr_id *parent; + struct hashmap *ids; + int runtime; }; struct expr_id_data; struct expr_scanner_ctx { - int start_token; int runtime; }; -void expr__ctx_init(struct expr_parse_ctx *ctx); +struct hashmap *ids__new(void); +void ids__free(struct hashmap *ids); +int ids__insert(struct hashmap *ids, const char *id); +/* + * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and + * ids2. + */ +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2); + +struct expr_parse_ctx *expr__ctx_new(void); void expr__ctx_clear(struct expr_parse_ctx *ctx); +void expr__ctx_free(struct expr_parse_ctx *ctx); + void expr__del_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data); +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles); int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); + int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime); -int expr__find_other(const char *expr, const char *one, - struct expr_parse_ctx *ids, int runtime); + const char *expr); + +int expr__find_ids(const char *expr, const char *one, + struct expr_parse_ctx *ids); double expr_id_data__value(const struct expr_id_data *data); -struct expr_id *expr_id_data__parent(struct expr_id_data *data); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 13e5e3c75f56..bd20f33418ba 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -41,11 +41,9 @@ static char *normalize(char *str, int runtime) char *dst = str; while (*str) { - if (*str == '@') - *dst++ = '/'; - else if (*str == '\\') + if (*str == '\\') *dst++ = *++str; - else if (*str == '?') { + else if (*str == '?') { char *paramval; int i = 0; int size = asprintf(¶mval, "%d", runtime); @@ -91,15 +89,6 @@ symbol ({spec}|{sym})+ %% struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - { - int start_token = sctx->start_token; - - if (sctx->start_token) { - sctx->start_token = 0; - return start_token; - } - } - d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index b2ada8f8309a..f969dfa525bd 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,42 +1,43 @@ /* Simple expression parser */ %{ #define YYDEBUG 1 -#include <stdio.h> -#include "util.h" +#include <assert.h> +#include <math.h> #include "util/debug.h" -#include <stdlib.h> // strtod() +#include "smt.h" #define IN_EXPR_Y 1 #include "expr.h" -#include "smt.h" -#include <string.h> - -static double d_ratio(double val0, double val1) -{ - if (val1 == 0) { - return 0; - } - return val0 / val1; -} - %} %define api.pure full %parse-param { double *final_val } %parse-param { struct expr_parse_ctx *ctx } +%parse-param { bool compute_ids } %parse-param {void *scanner} %lex-param {void* scanner} %union { double num; char *str; + struct ids { + /* + * When creating ids, holds the working set of event ids. NULL + * implies the set is empty. + */ + struct hashmap *ids; + /* + * The metric value. When not creating ids this is the value + * read from a counter, a constant or some computed value. When + * creating ids the value is either a constant or BOTTOM. NAN is + * used as the special BOTTOM value, representing a "set of all + * values" case. + */ + double val; + } ids; } -%token EXPR_PARSE EXPR_OTHER EXPR_ERROR -%token <num> NUMBER -%token <str> ID -%destructor { free ($$); } <str> -%token MIN MAX IF ELSE SMT_ON D_RATIO +%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR %left MIN MAX IF %left '|' %left '^' @@ -45,83 +46,245 @@ static double d_ratio(double val0, double val1) %left '-' '+' %left '*' '/' '%' %left NEG NOT -%type <num> expr if_expr +%type <num> NUMBER +%type <str> ID +%destructor { free ($$); } <str> +%type <ids> expr if_expr +%destructor { ids__free($$.ids); } <ids> %{ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, + bool compute_ids __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } +/* + * During compute ids, the special "bottom" value uses NAN to represent the set + * of all values. NAN is selected as it isn't a useful constant value. + */ +#define BOTTOM NAN + +/* During computing ids, does val represent a constant (non-BOTTOM) value? */ +static bool is_const(double val) +{ + return isfinite(val); +} + +static struct ids union_expr(struct ids ids1, struct ids ids2) +{ + struct ids result = { + .val = BOTTOM, + .ids = ids__union(ids1.ids, ids2.ids), + }; + return result; +} + +/* + * If we're not computing ids or $1 and $3 are constants, compute the new + * constant value using OP. Its invariant that there are no ids. If computing + * ids for non-constants union the set of IDs that must be computed. + */ +#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ + RESULT.val = (long)LHS.val OP (long)RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } + +#define BINARY_OP(RESULT, OP, LHS, RHS) \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ + RESULT.val = LHS.val OP RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } + %} %% -start: -EXPR_PARSE all_expr -| -EXPR_OTHER all_other +start: if_expr +{ + if (compute_ids) + ctx->ids = ids__union($1.ids, ctx->ids); -all_other: all_other other -| + if (final_val) + *final_val = $1.val; +} +; + +if_expr: expr IF expr ELSE expr +{ + if (fpclassify($3.val) == FP_ZERO) { + /* + * The IF expression evaluated to 0 so treat as false, take the + * ELSE and discard everything else. + */ + $$.val = $5.val; + $$.ids = $5.ids; + ids__free($1.ids); + ids__free($3.ids); + } else if (!compute_ids || is_const($3.val)) { + /* + * If ids aren't computed then treat the expression as true. If + * ids are being computed and the IF expr is a non-zero + * constant, then also evaluate the true case. + */ + $$.val = $1.val; + $$.ids = $1.ids; + ids__free($3.ids); + ids__free($5.ids); + } else if ($1.val == $5.val) { + /* + * LHS == RHS, so both are an identical constant. No need to + * evaluate any events. + */ + $$.val = $1.val; + $$.ids = NULL; + ids__free($1.ids); + ids__free($3.ids); + ids__free($5.ids); + } else { + /* + * Value is either the LHS or RHS and we need the IF expression + * to compute it. + */ + $$ = union_expr($1, union_expr($3, $5)); + } +} +| expr +; -other: ID +expr: NUMBER { - expr__add_id(ctx, $1); -} -| -MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' -| -'<' | '>' | D_RATIO - -all_expr: if_expr { *final_val = $1; } - ; - -if_expr: - expr IF expr ELSE expr { $$ = $3 ? $1 : $5; } - | expr - ; - -expr: NUMBER - | ID { - struct expr_id_data *data; - - if (expr__resolve_id(ctx, $1, &data)) { - free($1); - YYABORT; - } - - $$ = expr_id_data__value(data); - free($1); - } - | expr '|' expr { $$ = (long)$1 | (long)$3; } - | expr '&' expr { $$ = (long)$1 & (long)$3; } - | expr '^' expr { $$ = (long)$1 ^ (long)$3; } - | expr '<' expr { $$ = $1 < $3; } - | expr '>' expr { $$ = $1 > $3; } - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } - | expr '/' expr { if ($3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = $1 / $3; - } - | expr '%' expr { if ((long)$3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = (long)$1 % (long)$3; - } - | '-' expr %prec NEG { $$ = -$2; } - | '(' if_expr ')' { $$ = $2; } - | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } - | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } - | SMT_ON { $$ = smt_on() > 0; } - | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } - ; + $$.val = $1; + $$.ids = NULL; +} +| ID +{ + if (!compute_ids) { + /* + * Compute the event's value from ID. If the ID isn't known then + * it isn't used to compute the formula so set to NAN. + */ + struct expr_id_data *data; + + $$.val = NAN; + if (expr__resolve_id(ctx, $1, &data) == 0) + $$.val = expr_id_data__value(data); + + $$.ids = NULL; + free($1); + } else { + /* + * Set the value to BOTTOM to show that any value is possible + * when the event is computed. Create a set of just the ID. + */ + $$.val = BOTTOM; + $$.ids = ids__new(); + if (!$$.ids || ids__insert($$.ids, $1)) + YYABORT; + } +} +| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } +| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } +| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } +| expr '<' expr { BINARY_OP($$, <, $1, $3); } +| expr '>' expr { BINARY_OP($$, >, $1, $3); } +| expr '+' expr { BINARY_OP($$, +, $1, $3); } +| expr '-' expr { BINARY_OP($$, -, $1, $3); } +| expr '*' expr { BINARY_OP($$, *, $1, $3); } +| expr '/' expr +{ + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = $1.val / $3.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($1, $3); + } +} +| expr '%' expr +{ + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); + $$.val = (long)$1.val % (long)$3.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($1, $3); + } +} +| D_RATIO '(' expr ',' expr ')' +{ + if (fpclassify($5.val) == FP_ZERO) { + /* + * Division by constant zero always yields zero and no events + * are necessary. + */ + assert($5.ids == NULL); + $$.val = 0.0; + $$.ids = NULL; + ids__free($3.ids); + } else if (!compute_ids || (is_const($3.val) && is_const($5.val))) { + assert($3.ids == NULL); + assert($5.ids == NULL); + $$.val = $3.val / $5.val; + $$.ids = NULL; + } else { + /* LHS and/or RHS need computing from event IDs so union. */ + $$ = union_expr($3, $5); + } +} +| '-' expr %prec NEG +{ + $$.val = -$2.val; + $$.ids = $2.ids; +} +| '(' if_expr ')' +{ + $$ = $2; +} +| MIN '(' expr ',' expr ')' +{ + if (!compute_ids) { + $$.val = $3.val < $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } +} +| MAX '(' expr ',' expr ')' +{ + if (!compute_ids) { + $$.val = $3.val > $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } +} +| SMT_ON +{ + $$.val = smt_on() > 0 ? 1.0 : 0.0; + $$.ids = NULL; +} +; %% diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d4137559be05..3db3293213a9 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -42,7 +42,7 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #error "unsupported architecture" #endif -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define GEN_ELF_ENDIAN ELFDATA2MSB #else #define GEN_ELF_ENDIAN ELFDATA2LSB diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1c7414f66655..56511db8fa03 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -48,6 +48,7 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "clockid.h" #include "pmu-hybrid.h" @@ -1006,17 +1007,17 @@ static int write_bpf_prog_info(struct feat_fd *ff, node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - len = sizeof(struct bpf_prog_info_linear) + + len = sizeof(struct perf_bpil) + node->info_linear->data_len; /* before writing to file, translate address to offset */ - bpf_program__bpil_addr_to_offs(node->info_linear); + bpil_addr_to_offs(node->info_linear); ret = do_write(ff, node->info_linear, len); /* * translate back to address even when do_write() fails, * so that this function never changes the data. */ - bpf_program__bpil_offs_to_addr(node->info_linear); + bpil_offs_to_addr(node->info_linear); if (ret < 0) goto out; } @@ -3018,9 +3019,9 @@ static int process_dir_format(struct feat_fd *ff, #ifdef HAVE_LIBBPF_SUPPORT static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = &ff->ph->env; + struct perf_bpil *info_linear; u32 count, i; int err = -1; @@ -3049,7 +3050,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; } - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + + info_linear = malloc(sizeof(struct perf_bpil) + data_len); if (!info_linear) goto out; @@ -3071,7 +3072,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; /* after reading from file, translate offset to address */ - bpf_program__bpil_offs_to_addr(info_linear); + bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; perf_env__insert_bpf_prog_info(env, info_node); } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index af1e78d76228..2c8147a62203 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -35,7 +35,7 @@ #define INTEL_BTS_ERR_NOINSN 5 #define INTEL_BTS_ERR_LOST 9 -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le64_to_cpu bswap_64 #else #define le64_to_cpu diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index bc629359826f..b41c2e9c6f88 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -18,3 +18,5 @@ CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder ifeq ($(CC_NO_CLANG), 1) CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init endif + +CFLAGS_intel-pt-insn-decoder.o += -Wno-packed diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5ab631702769..5f83937bf8f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) { decoder->sample_timestamp = decoder->timestamp; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + decoder->state.cycles = decoder->tot_cyc_cnt; } static void intel_pt_reposition(struct intel_pt_decoder *decoder) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4b5e79fcf557..8fd68f7a0963 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -218,6 +218,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; + uint64_t cycles; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 593f20e9774c..9d5e65cec89b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -143,7 +143,7 @@ static void intel_pt_insn_decoder(struct insn *insn, if (branch == INTEL_PT_BR_CONDITIONAL || branch == INTEL_PT_BR_UNCONDITIONAL) { -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ switch (insn->immediate.nbytes) { case 1: intel_pt_insn->rel = insn->immediate.value; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 09feb5b07d32..5f5dfc8753f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -82,10 +82,10 @@ static int intel_pt_log_open(void) if (f) return 0; - if (!log_name[0]) - return -1; - - f = fopen(log_name, "w+"); + if (log_name[0]) + f = fopen(log_name, "w+"); + else + f = stdout; if (!f) { intel_pt_enable_logging = false; return -1; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 02a3395d6ce3..4bd154848cad 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -16,7 +16,7 @@ #define BIT63 ((uint64_t)1 << 63) -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6f852b305e92..556a893508da 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -111,6 +111,7 @@ struct intel_pt { u64 cbr_id; u64 psb_id; + bool single_pebs; bool sample_pebs; struct evsel *pebs_evsel; @@ -148,6 +149,14 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; +/* applicable_counters is 64-bits */ +#define INTEL_PT_MAX_PEBS 64 + +struct intel_pt_pebs_event { + struct evsel *evsel; + u64 id; +}; + struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -163,6 +172,7 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; @@ -189,6 +199,7 @@ struct intel_pt_queue { u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -1571,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; @@ -1622,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; @@ -1978,15 +1989,13 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->core.id[0]; u8 cpumode; u64 regs[8 * sizeof(sample.intr_regs.mask)]; @@ -2112,6 +2121,45 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } +static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + u64 id = evsel->core.id[0]; + + return intel_pt_do_synth_pebs_sample(ptq, evsel, id); +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct intel_pt_pebs_event *pe; + struct intel_pt *pt = ptq->pt; + int err = -EINVAL; + int hw_id; + + if (!items->has_applicable_counters || !items->applicable_counters) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no applicable_counters\n"); + return intel_pt_synth_single_pebs_sample(ptq); + } + + for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { + pe = &ptq->pebs[hw_id]; + if (!pe->evsel) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", + hw_id); + return intel_pt_synth_single_pebs_sample(ptq); + } + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + if (err) + return err; + } + + return err; +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -2198,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; + } /* * Do PEBS first to allow for the possibility that the PEBS timestamp @@ -2882,6 +2937,30 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + u64 hw_id = event->aux_output_hw_id.hw_id; + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + struct evsel *evsel; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); + if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { + pr_err("Bad AUX output hardware ID\n"); + return -EINVAL; + } + + ptq = queue->priv; + + ptq->pebs[hw_id].evsel = evsel; + ptq->pebs[hw_id].id = sample->id; + + return 0; +} + static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -3009,6 +3088,8 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) + err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); @@ -3393,9 +3474,13 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { + if (pt->single_pebs) { + pt->single_pebs = false; + return; + } + pt->single_pebs = true; pt->sample_pebs = true; pt->pebs_evsel = evsel; - return; } } } @@ -3651,8 +3736,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; - intel_pt_log_set_name(INTEL_PT_PMU_NAME); - if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { @@ -3667,6 +3750,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->synth_opts.thread_stack = opts->thread_stack; } + if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ pt->auxtrace_type = auxtrace_info->type; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 44e40bad0e33..fb8496df8432 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -755,6 +755,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_aux_output_hw_id(event, stdout); + return 0; +} + int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { @@ -2028,6 +2036,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_bpf(machine, event, sample); break; case PERF_RECORD_TEXT_POKE: ret = machine__process_text_poke(machine, event, sample); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret = machine__process_aux_output_hw_id_event(machine, event); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 7377ed6efdf1..a143087eeb47 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -124,6 +124,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_aux_output_hw_id_event(struct machine *machine, + union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_namespaces_event(struct machine *machine, diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index f0e75df72b80..3167b4628b6d 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = { [PERF_MEM_LVLNUM_NA] = "N/A", }; +static const char * const mem_hops[] = { + "N/A", + /* + * While printing, 'Remote' will be added to represent + * 'Remote core, same node' accesses as remote field need + * to be set with mem_hops field. + */ + "core, same node", +}; + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -320,12 +330,14 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - if (mem_info && mem_info->data_src.mem_remote) { strcat(out, "Remote "); l += 7; } + if (mem_info && mem_info->data_src.mem_hops) + l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); + printed = 0; for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { if (!(m & 0x1)) @@ -472,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) /* * Skylake might report unknown remote level via this * bit, consider it when evaluating remote HITMs. + * + * Incase of power, remote field can also be used to denote cache + * accesses from the another core of same node. Hence, setting + * mrem only when HOPS is zero along with set remote field. */ - bool mrem = data_src->mem_remote; + bool mrem = (data_src->mem_remote && !data_src->mem_hops); int err = 0; #define HITM_INC(__f) \ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 29b747ac31c1..fffe02aae3ed 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -11,6 +11,7 @@ #include "evsel.h" #include "strbuf.h" #include "pmu.h" +#include "pmu-hybrid.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -18,6 +19,7 @@ #include "strlist.h" #include <assert.h> #include <linux/ctype.h> +#include <linux/list_sort.h> #include <linux/string.h> #include <linux/zalloc.h> #include <subcmd/parse-options.h> @@ -84,6 +86,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, struct metric_expr *expr, *tmp; list_for_each_entry_safe(expr, tmp, &me->head, nd) { + free((char *)expr->metric_name); free(expr->metric_refs); free(expr->metric_events); free(expr); @@ -116,289 +119,207 @@ struct metric_ref_node { struct list_head list; }; +/** + * The metric under construction. The data held here will be placed in a + * metric_expr. + */ struct metric { struct list_head nd; - struct expr_parse_ctx pctx; + /** + * The expression parse context importantly holding the IDs contained + * within the expression. + */ + struct expr_parse_ctx *pctx; + /** The name of the metric such as "IPC". */ const char *metric_name; + /** Modifier on the metric such as "u" or NULL for none. */ + const char *modifier; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. + */ const char *metric_unit; - struct list_head metric_refs; - int metric_refs_cnt; - int runtime; + /** Optional null terminated array of referenced metrics. */ + struct metric_ref *metric_refs; + /** + * Is there a constraint on the group of events? In which case the + * events won't be grouped. + */ bool has_constraint; + /** + * Parsed events for the metric. Optional as events may be taken from a + * different metric whose group contains all the IDs necessary for this + * one. + */ + struct evlist *evlist; }; -#define RECURSION_ID_MAX 1000 +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; -struct expr_ids { - struct expr_id id[RECURSION_ID_MAX]; - int cnt; -}; + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } -static struct expr_id *expr_ids__alloc(struct expr_ids *ids) + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(const struct pmu_event *pe) { - if (ids->cnt >= RECURSION_ID_MAX) - return NULL; - return &ids->id[ids->cnt++]; + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; } -static void expr_ids__exit(struct expr_ids *ids) +static struct metric *metric__new(const struct pmu_event *pe, + const char *modifier, + bool metric_no_group, + int runtime) { - int i; + struct metric *m; + + m = zalloc(sizeof(*m)); + if (!m) + return NULL; + + m->pctx = expr__ctx_new(); + if (!m->pctx) { + free(m); + return NULL; + } - for (i = 0; i < ids->cnt; i++) - free(ids->id[i].id); + m->metric_name = pe->metric_name; + m->modifier = modifier ? strdup(modifier) : NULL; + if (modifier && !m->modifier) { + free(m); + expr__ctx_free(m->pctx); + return NULL; + } + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->pctx->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + m->metric_refs = NULL; + m->evlist = NULL; + + return m; } -static bool contains_event(struct evsel **metric_events, int num_events, - const char *event_name) +static void metric__free(struct metric *m) +{ + free(m->metric_refs); + expr__ctx_free(m->pctx); + free((char *)m->modifier); + evlist__delete(m->evlist); + free(m); +} + +static bool contains_metric_id(struct evsel **metric_events, int num_events, + const char *metric_id) { int i; for (i = 0; i < num_events; i++) { - if (!strcmp(metric_events[i]->name, event_name)) + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) return true; } return false; } -static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) -{ - if (!ev1->pmu_name || !ev2->pmu_name) - return true; - - return !strcmp(ev1->pmu_name, ev2->pmu_name); -} - /** - * Find a group of events in perf_evlist that correspond to those from a parsed - * metric expression. Note, as find_evsel_group is called in the same order as - * perf_evlist was constructed, metric_no_merge doesn't need to test for - * underfilling a group. - * @perf_evlist: a list of events something like: {metric1 leader, metric1 - * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, - * metric2 sibling}:W,duration_time - * @pctx: the parse context for the metric expression. - * @metric_no_merge: don't attempt to share events for the metric with other - * metrics. - * @has_constraint: is there a constraint on the group of events? In which case - * the events won't be grouped. - * @metric_events: out argument, null terminated array of evsel's associated - * with the metric. - * @evlist_used: in/out argument, bitmap tracking which evlist events are used. - * @return the first metric event or NULL on failure. + * setup_metric_events - Find a group of events in metric_evlist that correspond + * to the IDs from a parsed metric expression. + * @ids: the metric IDs to match. + * @metric_evlist: the list of perf events. + * @out_metric_events: holds the created metric events array. */ -static struct evsel *find_evsel_group(struct evlist *perf_evlist, - struct expr_parse_ctx *pctx, - bool metric_no_merge, - bool has_constraint, - struct evsel **metric_events, - unsigned long *evlist_used) +static int setup_metric_events(struct hashmap *ids, + struct evlist *metric_evlist, + struct evsel ***out_metric_events) { - struct evsel *ev, *current_leader = NULL; - struct expr_id_data *val_ptr; - int i = 0, matched_events = 0, events_to_match; - const int idnum = (int)hashmap__size(&pctx->ids); + struct evsel **metric_events; + const char *metric_id; + struct evsel *ev; + size_t ids_size, matched_events, i; - /* - * duration_time is always grouped separately, when events are grouped - * (ie has_constraint is false) then ignore it in the matching loop and - * add it to metric_events at the end. - */ - if (!has_constraint && - hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) - events_to_match = idnum - 1; - else - events_to_match = idnum; + *out_metric_events = NULL; + ids_size = hashmap__size(ids); + + metric_events = calloc(sizeof(void *), ids_size + 1); + if (!metric_events) + return -ENOMEM; + + matched_events = 0; + evlist__for_each_entry(metric_evlist, ev) { + struct expr_id_data *val_ptr; - evlist__for_each_entry (perf_evlist, ev) { /* - * Events with a constraint aren't grouped and match the first - * events available. + * Check for duplicate events with the same name. For + * example, uncore_imc/cas_count_read/ will turn into 6 + * events per socket on skylakex. Only the first such + * event is placed in metric_events. */ - if (has_constraint && ev->weak_group) - continue; - /* Ignore event if already used and merging is disabled. */ - if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) + metric_id = evsel__metric_id(ev); + if (contains_metric_id(metric_events, matched_events, metric_id)) continue; - if (!has_constraint && !evsel__has_leader(ev, current_leader)) { - /* - * Start of a new group, discard the whole match and - * start again. - */ - matched_events = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - current_leader = evsel__leader(ev); - } /* - * Check for duplicate events with the same name. For example, - * uncore_imc/cas_count_read/ will turn into 6 events per socket - * on skylakex. Only the first such event is placed in - * metric_events. If events aren't grouped then this also - * ensures that the same event in different sibling groups - * aren't both added to metric_events. + * Does this event belong to the parse context? For + * combined or shared groups, this metric may not care + * about this event. */ - if (contains_event(metric_events, matched_events, ev->name)) - continue; - /* Does this event belong to the parse context? */ - if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) + if (hashmap__find(ids, metric_id, (void **)&val_ptr)) { metric_events[matched_events++] = ev; - if (matched_events == events_to_match) - break; - } - - if (events_to_match != idnum) { - /* Add the first duration_time. */ - evlist__for_each_entry(perf_evlist, ev) { - if (!strcmp(ev->name, "duration_time")) { - metric_events[matched_events++] = ev; + if (matched_events >= ids_size) break; - } } } - - if (matched_events != idnum) { - /* Not a whole match */ - return NULL; + if (matched_events < ids_size) { + free(metric_events); + return -EINVAL; } - - metric_events[idnum] = NULL; - - for (i = 0; i < idnum; i++) { + for (i = 0; i < ids_size; i++) { ev = metric_events[i]; - /* Don't free the used events. */ - set_bit(ev->core.idx, evlist_used); + ev->collect_stat = true; + /* - * The metric leader points to the identically named event in - * metric_events. + * The metric leader points to the identically named + * event in metric_events. */ ev->metric_leader = ev; /* - * Mark two events with identical names in the same group (or - * globally) as being in use as uncore events may be duplicated - * for each pmu. Set the metric leader of such events to be the - * event that appears in metric_events. + * Mark two events with identical names in the same + * group (or globally) as being in use as uncore events + * may be duplicated for each pmu. Set the metric leader + * of such events to be the event that appears in + * metric_events. */ - evlist__for_each_entry_continue(perf_evlist, ev) { - /* - * If events are grouped then the search can terminate - * when then group is left. - */ - if (!has_constraint && - ev->core.leader != metric_events[i]->core.leader && - evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) - break; - if (!strcmp(metric_events[i]->name, ev->name)) { - set_bit(ev->core.idx, evlist_used); + metric_id = evsel__metric_id(ev); + evlist__for_each_entry_continue(metric_evlist, ev) { + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) ev->metric_leader = metric_events[i]; - } } } - - return metric_events[0]; -} - -static int metricgroup__setup_events(struct list_head *groups, - bool metric_no_merge, - struct evlist *perf_evlist, - struct rblist *metric_events_list) -{ - struct metric_event *me; - struct metric_expr *expr; - int i = 0; - int ret = 0; - struct metric *m; - struct evsel *evsel, *tmp; - unsigned long *evlist_used; - - evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries); - if (!evlist_used) - return -ENOMEM; - - list_for_each_entry (m, groups, nd) { - struct evsel **metric_events; - struct metric_ref *metric_refs = NULL; - - metric_events = calloc(sizeof(void *), - hashmap__size(&m->pctx.ids) + 1); - if (!metric_events) { - ret = -ENOMEM; - break; - } - evsel = find_evsel_group(perf_evlist, &m->pctx, - metric_no_merge, - m->has_constraint, metric_events, - evlist_used); - if (!evsel) { - pr_debug("Cannot resolve %s: %s\n", - m->metric_name, m->metric_expr); - free(metric_events); - continue; - } - for (i = 0; metric_events[i]; i++) - metric_events[i]->collect_stat = true; - me = metricgroup__lookup(metric_events_list, evsel, true); - if (!me) { - ret = -ENOMEM; - free(metric_events); - break; - } - expr = malloc(sizeof(struct metric_expr)); - if (!expr) { - ret = -ENOMEM; - free(metric_events); - break; - } - - /* - * Collect and store collected nested expressions - * for metric processing. - */ - if (m->metric_refs_cnt) { - struct metric_ref_node *ref; - - metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); - if (!metric_refs) { - ret = -ENOMEM; - free(metric_events); - free(expr); - break; - } - - i = 0; - list_for_each_entry(ref, &m->metric_refs, list) { - /* - * Intentionally passing just const char pointers, - * originally from 'struct pmu_event' object. - * We don't need to change them, so there's no - * need to create our own copy. - */ - metric_refs[i].metric_name = ref->metric_name; - metric_refs[i].metric_expr = ref->metric_expr; - i++; - } - } - - expr->metric_refs = metric_refs; - expr->metric_expr = m->metric_expr; - expr->metric_name = m->metric_name; - expr->metric_unit = m->metric_unit; - expr->metric_events = metric_events; - expr->runtime = m->runtime; - list_add(&expr->nd, &me->head); - } - - evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { - if (!test_bit(evsel->core.idx, evlist_used)) { - evlist__remove(perf_evlist, evsel); - evsel__delete(evsel); - } - } - bitmap_free(evlist_used); - - return ret; + *out_metric_events = metric_events; + return 0; } static bool match_metric(const char *n, const char *list) @@ -422,7 +343,7 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(struct pmu_event *pe, const char *metric) +static bool match_pe_metric(const struct pmu_event *pe, const char *metric) { return match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric); @@ -506,7 +427,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } -static int metricgroup__print_pmu_event(struct pmu_event *pe, +static int metricgroup__print_pmu_event(const struct pmu_event *pe, bool metricgroups, char *filter, bool raw, bool details, struct rblist *groups, @@ -581,14 +502,14 @@ struct metricgroup_print_sys_idata { bool details; }; -typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); +typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *); struct metricgroup_iter_data { metricgroup_sys_event_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; @@ -607,7 +528,7 @@ static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) return 0; } -static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_print_sys_idata *d = data; @@ -616,10 +537,10 @@ static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details) + bool raw, bool details, const char *pmu_name) { - struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_event *pe; int i; struct rblist groups; struct rb_node *node, *next; @@ -642,6 +563,10 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, break; if (!pe->metric_expr) continue; + if (pmu_name && perf_pmu__is_hybrid(pe->pmu) && + strcmp(pmu_name, pe->pmu)) { + continue; + } if (metricgroup__print_pmu_event(pe, metricgroups, filter, raw, details, &groups, metriclist) < 0) @@ -686,150 +611,391 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } -static void metricgroup__add_metric_weak_group(struct strbuf *events, - struct expr_parse_ctx *ctx) +static const char *code_characters = ",-=@"; + +static int encode_metric_id(struct strbuf *sb, const char *x) { - struct hashmap_entry *cur; - size_t bkt; - bool no_group = true, has_duration = false; + char *c; + int ret = 0; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { - pr_debug("found event %s\n", (const char *)cur->key); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(cur->key, "duration_time")) { - has_duration = true; - continue; + for (; *x; x++) { + c = strchr(code_characters, *x); + if (c) { + ret = strbuf_addch(sb, '!'); + if (ret) + break; + + ret = strbuf_addch(sb, '0' + (c - code_characters)); + if (ret) + break; + } else { + ret = strbuf_addch(sb, *x); + if (ret) + break; } - strbuf_addf(events, "%s%s", - no_group ? "{" : ",", - (const char *)cur->key); - no_group = false; } - if (!no_group) { - strbuf_addf(events, "}:W"); - if (has_duration) - strbuf_addf(events, ",duration_time"); - } else if (has_duration) - strbuf_addf(events, "duration_time"); + return ret; } -static void metricgroup__add_metric_non_group(struct strbuf *events, - struct expr_parse_ctx *ctx) +static int decode_metric_id(struct strbuf *sb, const char *x) { - struct hashmap_entry *cur; - size_t bkt; - bool first = true; + const char *orig = x; + size_t i; + char c; + int ret; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { - if (!first) - strbuf_addf(events, ","); - strbuf_addf(events, "%s", (const char *)cur->key); - first = false; + for (; *x; x++) { + c = *x; + if (*x == '!') { + x++; + i = *x - '0'; + if (i > strlen(code_characters)) { + pr_err("Bad metric-id encoding in: '%s'", orig); + return -1; + } + c = code_characters[i]; + } + ret = strbuf_addch(sb, c); + if (ret) + return ret; } + return 0; } -static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier) { - static bool violate_nmi_constraint; + struct evsel *ev; + struct strbuf sb = STRBUF_INIT; + char *cur; + int ret = 0; - if (!foot) { - pr_warning("Splitting metric group %s into standalone metrics.\n", name); - violate_nmi_constraint = true; - return; - } + evlist__for_each_entry(perf_evlist, ev) { + if (!ev->metric_id) + continue; - if (!violate_nmi_constraint) - return; + ret = strbuf_setlen(&sb, 0); + if (ret) + break; - pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" - " echo 0 > /proc/sys/kernel/nmi_watchdog\n" - " perf stat ...\n" - " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + ret = decode_metric_id(&sb, ev->metric_id); + if (ret) + break; + + free((char *)ev->metric_id); + ev->metric_id = strdup(sb.buf); + if (!ev->metric_id) { + ret = -ENOMEM; + break; + } + /* + * If the name is just the parsed event, use the metric-id to + * give a more friendly display version. + */ + if (strstr(ev->name, "metric-id=")) { + bool has_slash = false; + + free(ev->name); + for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) { + *cur = '/'; + has_slash = true; + } + + if (modifier) { + if (!has_slash && !strchr(sb.buf, ':')) { + ret = strbuf_addch(&sb, ':'); + if (ret) + break; + } + ret = strbuf_addstr(&sb, modifier); + if (ret) + break; + } + ev->name = strdup(sb.buf); + if (!ev->name) { + ret = -ENOMEM; + break; + } + } + } + strbuf_release(&sb); + return ret; } -static bool metricgroup__has_constraint(struct pmu_event *pe) +static int metricgroup__build_event_string(struct strbuf *events, + const struct expr_parse_ctx *ctx, + const char *modifier, + bool has_constraint) { - if (!pe->metric_constraint) - return false; + struct hashmap_entry *cur; + size_t bkt; + bool no_group = true, has_duration = false; + int ret = 0; - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && - sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); - return true; +#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) + + hashmap__for_each_entry(ctx->ids, cur, bkt) { + const char *sep, *rsep, *id = cur->key; + + pr_debug("found event %s\n", id); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(id, "duration_time")) { + has_duration = true; + continue; + } + /* Separate events with commas and open the group if necessary. */ + if (no_group) { + if (!has_constraint) { + ret = strbuf_addch(events, '{'); + RETURN_IF_NON_ZERO(ret); + } + + no_group = false; + } else { + ret = strbuf_addch(events, ','); + RETURN_IF_NON_ZERO(ret); + } + /* + * Encode the ID as an event string. Add a qualifier for + * metric_id that is the original name except with characters + * that parse-events can't parse replaced. For example, + * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/ + */ + sep = strchr(id, '@'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addch(events, '/'); + RETURN_IF_NON_ZERO(ret); + rsep = strrchr(sep, '@'); + ret = strbuf_add(events, sep + 1, rsep - sep - 1); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, ",metric-id="); + RETURN_IF_NON_ZERO(ret); + sep = rsep; + } else { + sep = strchr(id, ':'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + } else { + ret = strbuf_addstr(events, id); + RETURN_IF_NON_ZERO(ret); + } + ret = strbuf_addstr(events, "/metric-id="); + RETURN_IF_NON_ZERO(ret); + } + ret = encode_metric_id(events, id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, "/"); + RETURN_IF_NON_ZERO(ret); + + if (sep != NULL) { + ret = strbuf_addstr(events, sep + 1); + RETURN_IF_NON_ZERO(ret); + } + if (modifier) { + ret = strbuf_addstr(events, modifier); + RETURN_IF_NON_ZERO(ret); + } } + if (has_duration) { + if (no_group) { + /* Strange case of a metric of just duration_time. */ + ret = strbuf_addf(events, "duration_time"); + } else if (!has_constraint) + ret = strbuf_addf(events, "}:W,duration_time"); + else + ret = strbuf_addf(events, ",duration_time"); + } else if (!no_group && !has_constraint) + ret = strbuf_addf(events, "}:W"); - return false; + return ret; +#undef RETURN_IF_NON_ZERO } -int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) +int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) { return 1; } +/* + * A singly linked list on the stack of the names of metrics being + * processed. Used to identify recursion. + */ +struct visited_metric { + const char *name; + const struct visited_metric *parent; +}; + struct metricgroup_add_iter_data { struct list_head *metric_list; - const char *metric; - struct expr_ids *ids; + const char *metric_name; + const char *modifier; int *ret; bool *has_match; bool metric_no_group; + struct metric *root_metric; + const struct visited_metric *visited; + const struct pmu_events_map *map; }; +static int add_metric(struct list_head *metric_list, + const struct pmu_event *pe, + const char *modifier, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map); + +/** + * resolve_metric - Locate metrics within the root metric and recursively add + * references to them. + * @metric_list: The list the metric is added to. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int resolve_metric(struct list_head *metric_list, + const char *modifier, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) +{ + struct hashmap_entry *cur; + size_t bkt; + struct to_resolve { + /* The metric to resolve. */ + const struct pmu_event *pe; + /* + * The key in the IDs map, this may differ from in case, + * etc. from pe->metric_name. + */ + const char *key; + } *pending = NULL; + int i, ret = 0, pending_cnt = 0; + + /* + * Iterate all the parsed IDs and if there's a matching metric and it to + * the pending array. + */ + hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { + const struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (pe) { + pending = realloc(pending, + (pending_cnt + 1) * sizeof(struct to_resolve)); + if (!pending) + return -ENOMEM; + + pending[pending_cnt].pe = pe; + pending[pending_cnt].key = cur->key; + pending_cnt++; + } + } + + /* Remove the metric IDs from the context. */ + for (i = 0; i < pending_cnt; i++) + expr__del_id(root_metric->pctx, pending[i].key); + + /* + * Recursively add all the metrics, IDs are added to the root metric's + * context. + */ + for (i = 0; i < pending_cnt; i++) { + ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group, + root_metric, visited, map); + if (ret) + break; + } + + free(pending); + return ret; +} + +/** + * __add_metric - Add a metric to metric_list. + * @metric_list: The list the metric is added to. + * @pe: The pmu_event containing the metric to be added. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @runtime: A special argument for the parser only known at runtime. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ static int __add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, + const char *modifier, bool metric_no_group, int runtime, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { - struct metric_ref_node *ref; - struct metric *m; + const struct visited_metric *vm; + int ret; + bool is_root = !root_metric; + struct visited_metric visited_node = { + .name = pe->metric_name, + .parent = visited, + }; - if (*mp == NULL) { + for (vm = visited; vm; vm = vm->parent) { + if (!strcmp(pe->metric_name, vm->name)) { + pr_err("failed: recursion detected for %s\n", pe->metric_name); + return -1; + } + } + + if (is_root) { /* - * We got in here for the parent group, - * allocate it and put it on the list. + * This metric is the root of a tree and may reference other + * metrics that are added recursively. */ - m = zalloc(sizeof(*m)); - if (!m) + root_metric = metric__new(pe, modifier, metric_no_group, runtime); + if (!root_metric) return -ENOMEM; - expr__ctx_init(&m->pctx); - m->metric_name = pe->metric_name; - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; - m->runtime = runtime; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); - INIT_LIST_HEAD(&m->metric_refs); - m->metric_refs_cnt = 0; - - parent = expr_ids__alloc(ids); - if (!parent) { - free(m); - return -EINVAL; - } - - parent->id = strdup(pe->metric_name); - if (!parent->id) { - free(m); - return -ENOMEM; - } - *mp = m; } else { + int cnt = 0; + /* - * We got here for the referenced metric, via the - * recursive metricgroup__add_metric call, add - * it to the parent group. + * This metric was referenced in a metric higher in the + * tree. Check if the same metric is already resolved in the + * metric_refs list. */ - m = *mp; + if (root_metric->metric_refs) { + for (; root_metric->metric_refs[cnt].metric_name; cnt++) { + if (!strcmp(pe->metric_name, + root_metric->metric_refs[cnt].metric_name)) + return 0; + } + } - ref = malloc(sizeof(*ref)); - if (!ref) + /* Create reference. Need space for the entry and the terminator. */ + root_metric->metric_refs = realloc(root_metric->metric_refs, + (cnt + 2) * sizeof(struct metric_ref)); + if (!root_metric->metric_refs) return -ENOMEM; /* @@ -838,54 +1004,35 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - ref->metric_name = pe->metric_name; - ref->metric_expr = pe->metric_expr; + root_metric->metric_refs[cnt].metric_name = pe->metric_name; + root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; - list_add(&ref->list, &m->metric_refs); - m->metric_refs_cnt++; + /* Null terminate array. */ + root_metric->metric_refs[cnt+1].metric_name = NULL; + root_metric->metric_refs[cnt+1].metric_expr = NULL; } - /* Force all found IDs in metric to have us as parent ID. */ - WARN_ON_ONCE(!parent); - m->pctx.parent = parent; - /* * For both the parent and referenced metrics, we parse - * all the metric's IDs and add it to the parent context. + * all the metric's IDs and add it to the root context. */ - if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { - if (m->metric_refs_cnt == 0) { - expr__ctx_clear(&m->pctx); - free(m); - *mp = NULL; - } - return -EINVAL; + if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { + /* Broken metric. */ + ret = -EINVAL; + } else { + /* Resolve referenced metrics. */ + ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric, + &visited_node, map); } - /* - * We add new group only in the 'parent' call, - * so bail out for referenced metric case. - */ - if (m->metric_refs_cnt) - return 0; - - if (list_empty(metric_list)) - list_add(&m->nd, metric_list); - else { - struct list_head *pos; - - /* Place the largest groups at the front. */ - list_for_each_prev(pos, metric_list) { - struct metric *old = list_entry(pos, struct metric, nd); + if (ret) { + if (is_root) + metric__free(root_metric); - if (hashmap__size(&m->pctx.ids) <= - hashmap__size(&old->pctx.ids)) - break; - } - list_add(&m->nd, pos); - } + } else if (is_root) + list_add(&root_metric->nd, metric_list); - return 0; + return ret; } #define map_for_each_event(__pe, __idx, __map) \ @@ -900,10 +1047,10 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map) +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map) { - struct pmu_event *pe; + const struct pmu_event *pe; int i; map_for_each_event(pe, i, map) { @@ -914,136 +1061,21 @@ struct pmu_event *metricgroup__find_metric(const char *metric, return NULL; } -static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, - struct expr_ids *ids) -{ - struct expr_id_data *data; - struct expr_id *p; - int ret; - - /* - * We get the parent referenced by 'id' argument and - * traverse through all the parent object IDs to check - * if we already processed 'id', if we did, it's recursion - * and we fail. - */ - ret = expr__get_id(&m->pctx, id, &data); - if (ret) - return ret; - - p = expr_id_data__parent(data); - - while (p->parent) { - if (!strcmp(p->id, id)) { - pr_err("failed: recursion detected for %s\n", id); - return -1; - } - p = p->parent; - } - - /* - * If we are over the limit of static entris, the metric - * is too difficult/nested to process, fail as well. - */ - p = expr_ids__alloc(ids); - if (!p) { - pr_err("failed: too many nested metrics\n"); - return -EINVAL; - } - - p->id = strdup(id); - p->parent = expr_id_data__parent(data); - *parent = p; - - return p->id ? 0 : -ENOMEM; -} - static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, + const char *modifier, bool metric_no_group, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids); - -static int __resolve_metric(struct metric *m, - bool metric_no_group, - struct list_head *metric_list, - struct pmu_events_map *map, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { - struct hashmap_entry *cur; - size_t bkt; - bool all; - int ret; - - /* - * Iterate all the parsed IDs and if there's metric, - * add it to the context. - */ - do { - all = true; - hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { - struct expr_id *parent; - struct pmu_event *pe; - - pe = metricgroup__find_metric(cur->key, map); - if (!pe) - continue; - - ret = recursion_check(m, cur->key, &parent, ids); - if (ret) - return ret; - - all = false; - /* The metric key itself needs to go out.. */ - expr__del_id(&m->pctx, cur->key); - - /* ... and it gets resolved to the parent context. */ - ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); - if (ret) - return ret; - - /* - * We added new metric to hashmap, so we need - * to break the iteration and start over. - */ - break; - } - } while (!all); - - return 0; -} - -static int resolve_metric(bool metric_no_group, - struct list_head *metric_list, - struct pmu_events_map *map, - struct expr_ids *ids) -{ - struct metric *m; - int err; - - list_for_each_entry(m, metric_list, nd) { - err = __resolve_metric(m, metric_no_group, metric_list, map, ids); - if (err) - return err; - } - return 0; -} - -static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, - bool metric_no_group, - struct metric **m, - struct expr_id *parent, - struct expr_ids *ids) -{ - struct metric *orig = *m; int ret = 0; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); + ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, + root_metric, visited, map); } else { int j, count; @@ -1054,29 +1086,25 @@ static int add_metric(struct list_head *metric_list, * those events to metric_list. */ - for (j = 0; j < count && !ret; j++, *m = orig) - ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); + for (j = 0; j < count && !ret; j++) + ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, + root_metric, visited, map); } return ret; } -static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, +static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; - struct metric *m = NULL; int ret; - if (!match_pe_metric(pe, d->metric)) + if (!match_pe_metric(pe, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); - if (ret) - goto out; - - ret = resolve_metric(d->metric_no_group, - d->metric_list, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, + d->root_metric, d->visited, d->map); if (ret) goto out; @@ -1087,32 +1115,47 @@ out: return ret; } -static int metricgroup__add_metric(const char *metric, bool metric_no_group, - struct strbuf *events, +static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, + const struct list_head *r) +{ + const struct metric *left = container_of(l, struct metric, nd); + const struct metric *right = container_of(r, struct metric, nd); + + return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); +} + +/** + * metricgroup__add_metric - Find and add a metric, or a metric group. + * @metric_name: The name of the metric or metric group. For example, "IPC" + * could be the name of a metric and "TopDownL1" the name of a + * metric group. + * @modifier: if non-null event modifiers like "u". + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @metric_list: The list that the metric or metric group are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int metricgroup__add_metric(const char *metric_name, const char *modifier, + bool metric_no_group, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { - struct expr_ids ids = { .cnt = 0, }; - struct pmu_event *pe; - struct metric *m; + const struct pmu_event *pe; LIST_HEAD(list); int i, ret; bool has_match = false; - map_for_each_metric(pe, i, map, metric) { + /* + * Iterate over all metrics seeing if metric matches either the name or + * group. When it does add the metric to the list. + */ + map_for_each_metric(pe, i, map, metric_name) { has_match = true; - m = NULL; - - ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); - if (ret) - goto out; - - /* - * Process any possible referenced metrics - * included in the expression. - */ - ret = resolve_metric(metric_no_group, - &list, map, &ids); + ret = add_metric(&list, pe, modifier, metric_no_group, + /*root_metric=*/NULL, + /*visited_metrics=*/NULL, map); if (ret) goto out; } @@ -1122,34 +1165,20 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, - .metric = metric, + .metric_name = metric_name, + .modifier = modifier, .metric_no_group = metric_no_group, - .ids = &ids, .has_match = &has_match, .ret = &ret, + .map = map, }, }; pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ - if (!has_match) { + if (!has_match) ret = -EINVAL; - goto out; - } - - list_for_each_entry(m, &list, nd) { - if (events->len > 0) - strbuf_addf(events, ","); - - if (m->has_constraint) { - metricgroup__add_metric_non_group(events, - &m->pctx); - } else { - metricgroup__add_metric_weak_group(events, - &m->pctx); - } - } out: /* @@ -1157,95 +1186,315 @@ out: * even if it's failed */ list_splice(&list, metric_list); - expr_ids__exit(&ids); return ret; } +/** + * metricgroup__add_metric_list - Find and add metrics, or metric groups, + * specified in a list. + * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" + * would match the IPC and CPI metrics, and TopDownL1 would match all + * the metrics in the TopDownL1 group. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @metric_list: The list that metrics are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, - struct strbuf *events, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { - char *llist, *nlist, *p; - int ret = -EINVAL; + char *list_itr, *list_copy, *metric_name, *modifier; + int ret, count = 0; - nlist = strdup(list); - if (!nlist) + list_copy = strdup(list); + if (!list_copy) return -ENOMEM; - llist = nlist; + list_itr = list_copy; + + while ((metric_name = strsep(&list_itr, ",")) != NULL) { + modifier = strchr(metric_name, ':'); + if (modifier) + *modifier++ = '\0'; - strbuf_init(events, 100); - strbuf_addf(events, "%s", ""); + ret = metricgroup__add_metric(metric_name, modifier, + metric_no_group, metric_list, + map); + if (ret == -EINVAL) + pr_err("Cannot find metric or group `%s'\n", metric_name); - while ((p = strsep(&llist, ",")) != NULL) { - ret = metricgroup__add_metric(p, metric_no_group, events, - metric_list, map); - if (ret == -EINVAL) { - fprintf(stderr, "Cannot find metric or group `%s'\n", - p); + if (ret) break; - } + + count++; } - free(nlist); + free(list_copy); - if (!ret) + if (!ret) { + /* + * Warn about nmi_watchdog if any parsed metrics had the + * NO_NMI_WATCHDOG constraint. + */ metricgroup___watchdog_constraint_hint(NULL, true); - + /* No metrics. */ + if (count == 0) + return -EINVAL; + } return ret; } -static void metric__free_refs(struct metric *metric) +static void metricgroup__free_metrics(struct list_head *metric_list) { - struct metric_ref_node *ref, *tmp; + struct metric *m, *tmp; - list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { - list_del(&ref->list); - free(ref); + list_for_each_entry_safe (m, tmp, metric_list, nd) { + list_del_init(&m->nd); + metric__free(m); } } -static void metricgroup__free_metrics(struct list_head *metric_list) +/** + * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint + * metric IDs, as the IDs are held in a set, + * duplicates will be removed. + * @metric_list: List to take metrics from. + * @combined: Out argument for result. + */ +static int build_combined_expr_ctx(const struct list_head *metric_list, + struct expr_parse_ctx **combined) { - struct metric *m, *tmp; + struct hashmap_entry *cur; + size_t bkt; + struct metric *m; + char *dup; + int ret; - list_for_each_entry_safe (m, tmp, metric_list, nd) { - metric__free_refs(m); - expr__ctx_clear(&m->pctx); - list_del_init(&m->nd); - free(m); + *combined = expr__ctx_new(); + if (!*combined) + return -ENOMEM; + + list_for_each_entry(m, metric_list, nd) { + if (m->has_constraint && !m->modifier) { + hashmap__for_each_entry(m->pctx->ids, cur, bkt) { + dup = strdup(cur->key); + if (!dup) { + ret = -ENOMEM; + goto err_out; + } + ret = expr__add_id(*combined, dup); + if (ret) + goto err_out; + } + } } + return 0; +err_out: + expr__ctx_free(*combined); + *combined = NULL; + return ret; +} + +/** + * parse_ids - Build the event string for the ids and parse them creating an + * evlist. The encoded metric_ids are decoded. + * @fake_pmu: used when testing metrics not supported by the current CPU. + * @ids: the event identifiers parsed from a metric. + * @modifier: any modifiers added to the events. + * @has_constraint: false if events should be placed in a weak group. + * @out_evlist: the created list of events. + */ +static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, + const char *modifier, bool has_constraint, struct evlist **out_evlist) +{ + struct parse_events_error parse_error; + struct evlist *parsed_evlist; + struct strbuf events = STRBUF_INIT; + int ret; + + *out_evlist = NULL; + if (hashmap__size(ids->ids) == 0) { + char *tmp; + /* + * No ids/events in the expression parsing context. Events may + * have been removed because of constant evaluation, e.g.: + * event1 if #smt_on else 0 + * Add a duration_time event to avoid a parse error on an empty + * string. + */ + tmp = strdup("duration_time"); + if (!tmp) + return -ENOMEM; + + ids__insert(ids->ids, tmp); + } + ret = metricgroup__build_event_string(&events, ids, modifier, + has_constraint); + if (ret) + return ret; + + parsed_evlist = evlist__new(); + if (!parsed_evlist) { + ret = -ENOMEM; + goto err_out; + } + pr_debug("Parsing metric events '%s'\n", events.buf); + parse_events_error__init(&parse_error); + ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); + if (ret) { + parse_events_error__print(&parse_error, events.buf); + goto err_out; + } + ret = decode_all_metric_ids(parsed_evlist, modifier); + if (ret) + goto err_out; + + *out_evlist = parsed_evlist; + parsed_evlist = NULL; +err_out: + parse_events_error__exit(&parse_error); + evlist__delete(parsed_evlist); + strbuf_release(&events); + return ret; } static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, struct perf_pmu *fake_pmu, - struct rblist *metric_events, - struct pmu_events_map *map) + struct rblist *metric_events_list, + const struct pmu_events_map *map) { - struct parse_events_error parse_error; - struct strbuf extra_events; + struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); + struct metric *m; int ret; - if (metric_events->nr_entries == 0) - metricgroup__rblist_init(metric_events); + if (metric_events_list->nr_entries == 0) + metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(str, metric_no_group, - &extra_events, &metric_list, map); + &metric_list, map); if (ret) goto out; - pr_debug("adding %s\n", extra_events.buf); - bzero(&parse_error, sizeof(parse_error)); - ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); - if (ret) { - parse_events_print_error(&parse_error, extra_events.buf); - goto out; + + /* Sort metrics from largest to smallest. */ + list_sort(NULL, &metric_list, metric_list_cmp); + + if (!metric_no_merge) { + struct expr_parse_ctx *combined = NULL; + + ret = build_combined_expr_ctx(&metric_list, &combined); + + if (!ret && combined && hashmap__size(combined->ids)) { + ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + /*has_constraint=*/true, + &combined_evlist); + } + if (combined) + expr__ctx_free(combined); + + if (ret) + goto out; } - ret = metricgroup__setup_events(&metric_list, metric_no_merge, - perf_evlist, metric_events); + + list_for_each_entry(m, &metric_list, nd) { + struct metric_event *me; + struct evsel **metric_events; + struct evlist *metric_evlist = NULL; + struct metric *n; + struct metric_expr *expr; + + if (combined_evlist && m->has_constraint) { + metric_evlist = combined_evlist; + } else if (!metric_no_merge) { + /* + * See if the IDs for this metric are a subset of an + * earlier metric. + */ + list_for_each_entry(n, &metric_list, nd) { + if (m == n) + break; + + if (n->evlist == NULL) + continue; + + if ((!m->modifier && n->modifier) || + (m->modifier && !n->modifier) || + (m->modifier && n->modifier && + strcmp(m->modifier, n->modifier))) + continue; + + if (expr__subset_of_ids(n->pctx, m->pctx)) { + pr_debug("Events in '%s' fully contained within '%s'\n", + m->metric_name, n->metric_name); + metric_evlist = n->evlist; + break; + } + + } + } + if (!metric_evlist) { + ret = parse_ids(fake_pmu, m->pctx, m->modifier, + m->has_constraint, &m->evlist); + if (ret) + goto out; + + metric_evlist = m->evlist; + } + ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); + if (ret) { + pr_debug("Cannot resolve IDs for %s: %s\n", + m->metric_name, m->metric_expr); + goto out; + } + + me = metricgroup__lookup(metric_events_list, metric_events[0], true); + + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + free(metric_events); + goto out; + } + + expr->metric_refs = m->metric_refs; + m->metric_refs = NULL; + expr->metric_expr = m->metric_expr; + if (m->modifier) { + char *tmp; + + if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0) + expr->metric_name = NULL; + else + expr->metric_name = tmp; + } else + expr->metric_name = strdup(m->metric_name); + + if (!expr->metric_name) { + ret = -ENOMEM; + free(metric_events); + goto out; + } + expr->metric_unit = m->metric_unit; + expr->metric_events = metric_events; + expr->runtime = m->pctx->runtime; + list_add(&expr->nd, &me->head); + } + + + if (combined_evlist) { + evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries); + evlist__delete(combined_evlist); + } + + list_for_each_entry(m, &metric_list, nd) { + if (m->evlist) + evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries); + } + out: metricgroup__free_metrics(&metric_list); - strbuf_release(&extra_events); return ret; } @@ -1256,14 +1505,14 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); } int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1275,8 +1524,8 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_event *pe; int i; if (!map) @@ -1328,7 +1577,10 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return -ENOMEM; new_expr->metric_expr = old_expr->metric_expr; - new_expr->metric_name = old_expr->metric_name; + new_expr->metric_name = strdup(old_expr->metric_name); + if (!new_expr->metric_name) + return -ENOMEM; + new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index cc4a92492a61..2b42b778d1bf 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -14,24 +14,51 @@ struct rblist; struct pmu_events_map; struct cgroup; +/** + * A node in a rblist keyed by the evsel. The global rblist of metric events + * generally exists in perf_stat_config. The evsel is looked up in the rblist + * yielding a list of metric_expr. + */ struct metric_event { struct rb_node nd; struct evsel *evsel; struct list_head head; /* list of metric_expr */ }; +/** + * A metric referenced by a metric_expr. When parsing a metric expression IDs + * will be looked up, matching either a value (from metric_events) or a + * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and + * metric_events need to be known before parsing so that their values may be + * placed in the parse context for lookup. + */ struct metric_ref { const char *metric_name; const char *metric_expr; }; +/** + * One in a list of metric_expr associated with an evsel. The data is used to + * generate a metric value during stat output. + */ struct metric_expr { struct list_head nd; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** The name of the meric such as "IPC". */ const char *metric_name; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. For example, "6.4e-05MiB" means to scale the resulting metric + * by 6.4e-05 (typically converting a unit like cache lines to something + * more human intelligible) and then add "MiB" afterward when displayed. + */ const char *metric_unit; + /** Null terminated array of events used by the metric. */ struct evsel **metric_events; + /** Null terminated array of referenced metrics. */ struct metric_ref *metric_refs; + /** A value substituted for '?' during parsing. */ int runtime; }; @@ -43,19 +70,19 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); -struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map); +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details); + bool raw, bool details, const char *pmu_name); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 512dc8b9c168..23ecdba9e670 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -350,3 +350,14 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } + +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) +{ + clone->nbits = original->nbits; + clone->bits = bitmap_zalloc(original->nbits); + if (!clone->bits) + return -ENOMEM; + + memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); + return 0; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index af33118354dd..8e259b9610f8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -64,4 +64,7 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, + struct mmap_cpu_mask *clone); + #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index b234d95fb10a..9fc86971027b 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -38,7 +38,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr, static int create_event_hybrid(__u32 config_type, int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -47,7 +48,7 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -70,7 +71,8 @@ static int pmu_cmp(struct parse_events_state *parse_state, static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -84,7 +86,7 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - &terms, pmu); + metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -94,14 +96,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, } static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { struct evsel *evsel; attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -113,7 +117,8 @@ static int create_raw_event_hybrid(int *idx, struct list_head *list, static int add_raw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -126,7 +131,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, &terms, pmu); + name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -138,7 +143,8 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, const char *metric_id, + struct list_head *config_terms, bool *hybrid) { *hybrid = false; @@ -150,16 +156,18 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, *hybrid = true; if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, + return add_hw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } - return add_raw_hybrid(parse_state, list, attr, name, + return add_raw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, + const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state) @@ -180,7 +188,7 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, &terms, pmu); + attr, name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index f33bd67aa851..cbc05fec02a2 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -11,11 +11,13 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, const char *metric_id, + struct list_head *config_terms, bool *hybrid); int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 51a2219df601..5bfb6f892489 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -19,8 +19,6 @@ #include <subcmd/exec-cmd.h> #include "string2.h" #include "strlist.h" -#include "symbol.h" -#include "header.h" #include "bpf-loader.h" #include "debug.h" #include <api/fs/tracing_path.h> @@ -193,39 +191,6 @@ static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help) -{ - if (WARN(!str, "WARNING: failed to provide error string\n")) { - free(help); - return; - } - switch (err->num_errors) { - case 0: - err->idx = idx; - err->str = str; - err->help = help; - break; - case 1: - err->first_idx = err->idx; - err->idx = idx; - err->first_str = err->str; - err->str = str; - err->first_help = err->help; - err->help = help; - break; - default: - pr_debug("Multiple errors dropping message: %s (%s)\n", - err->str, err->help); - free(err->str); - err->str = str; - free(err->help); - err->help = help; - break; - } - err->num_errors++; -} - struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; @@ -334,12 +299,7 @@ const char *event_type(int type) return "unknown"; } -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static char *get_config_name(struct list_head *head_terms) +static char *get_config_str(struct list_head *head_terms, int type_term) { struct parse_events_term *term; @@ -347,17 +307,27 @@ static char *get_config_name(struct list_head *head_terms) return NULL; list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) + if (term->type_term == type_term) return term->val.str; return NULL; } +static char *get_config_metric_id(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID); +} + +static char *get_config_name(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); +} + static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, - char *name, struct perf_pmu *pmu, + const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) { @@ -386,6 +356,9 @@ __add_event(struct list_head *list, int *idx, if (name) evsel->name = strdup(name); + if (metric_id) + evsel->metric_id = strdup(metric_id); + if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -396,18 +369,21 @@ __add_event(struct list_head *list, int *idx, } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu) + const char *name, const char *metric_id, + struct perf_pmu *pmu) { - return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, - NULL); + return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, + metric_id, pmu, /*config_terms=*/NULL, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL); } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, - struct list_head *config_terms) + struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, NULL, config_terms, - false, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, + /*pmu=*/NULL, config_terms, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; } static int add_event_tool(struct list_head *list, int *idx, @@ -419,8 +395,10 @@ static int add_event_tool(struct list_head *list, int *idx, .config = PERF_COUNT_SW_DUMMY, }; - evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, - "0"); + evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, /*pmu=*/NULL, + /*config_terms=*/NULL, /*auto_merge_stats=*/false, + /*cpu_list=*/"0"); if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; @@ -466,7 +444,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, { struct perf_event_attr attr; LIST_HEAD(config_terms); - char name[MAX_NAME_LEN], *config_name; + char name[MAX_NAME_LEN]; + const char *config_name, *metric_id; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n, ret; @@ -531,13 +510,17 @@ int parse_events_add_cache(struct list_head *list, int *idx, return -ENOMEM; } + metric_id = get_config_metric_id(head_config); ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, &config_terms, + config_name ? : name, + metric_id, + &config_terms, &hybrid, parse_state); if (hybrid) goto out_free_terms; - ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -571,7 +554,7 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events__handle_error(e, 0, strdup(str), strdup(help)); + parse_events_error__handle(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, @@ -795,7 +778,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -815,7 +798,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events__handle_error(parse_state->error, term->err_term, + parse_events_error__handle(parse_state->error, term->err_term, strdup("Invalid config term for BPF object"), NULL); return -EINVAL; @@ -835,7 +818,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, else idx = term->err_term + error_pos; - parse_events__handle_error(parse_state->error, idx, + parse_events_error__handle(parse_state->error, idx, strdup(errbuf), strdup( "Hint:\tValid config terms:\n" @@ -907,7 +890,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -931,7 +914,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct bpf_object *obj __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -943,7 +926,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, bool source __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -1014,7 +997,8 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, NULL, NULL); + return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL, + /*config_terms=*/NULL); } static int check_type_val(struct parse_events_term *term, @@ -1025,7 +1009,7 @@ static int check_type_val(struct parse_events_term *term, return 0; if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, type == PARSE_EVENTS__TERM_TYPE_NUM ? strdup("expected numeric value") : strdup("expected string value"), @@ -1059,6 +1043,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", + [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", }; static bool config_term_shrinked; @@ -1069,7 +1054,7 @@ config_term_avail(int term_type, struct parse_events_error *err) char *err_str; if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { - parse_events__handle_error(err, -1, + parse_events_error__handle(err, -1, strdup("Invalid term_type"), NULL); return false; } @@ -1081,6 +1066,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; @@ -1091,7 +1077,7 @@ config_term_avail(int term_type, struct parse_events_error *err) /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", config_term_names[term_type]) >= 0) - parse_events__handle_error(err, -1, err_str, NULL); + parse_events_error__handle(err, -1, err_str, NULL); return false; } } @@ -1135,7 +1121,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -1144,7 +1130,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1171,6 +1157,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1180,7 +1169,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1192,14 +1181,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; default: - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), parse_events_formats_error_string(NULL)); return -EINVAL; @@ -1253,7 +1242,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return config_term_common(attr, term, err); default: if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), strdup("valid terms: call-graph,stack-size\n")); } @@ -1440,6 +1429,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, { struct perf_event_attr attr; LIST_HEAD(config_terms); + const char *name, *metric_id; bool hybrid; int ret; @@ -1456,14 +1446,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, return -ENOMEM; } + name = get_config_name(head_config); + metric_id = get_config_metric_id(head_config); ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - get_config_name(head_config), + name, metric_id, &config_terms, &hybrid); if (hybrid) goto out_free_terms; - ret = add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), &config_terms); + ret = add_event(list, &parse_state->idx, &attr, name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -1471,7 +1463,7 @@ out_free_terms: int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event) + int tool_event) { return add_event_tool(list, &parse_state->idx, tool_event); } @@ -1549,7 +1541,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (asprintf(&err_str, "Cannot find PMU `%s'. Missing kernel support?", name) >= 0) - parse_events__handle_error(err, 0, err_str, NULL); + parse_events_error__handle(err, 0, err_str, NULL); return -EINVAL; } @@ -1564,8 +1556,11 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, - pmu, NULL, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, + /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, pmu, + /*config_terms=*/NULL, auto_merge_stats, + /*cpu_list=*/NULL); if (evsel) { evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; @@ -1618,9 +1613,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } - evsel = __add_event(list, &parse_state->idx, &attr, true, - get_config_name(head_config), pmu, - &config_terms, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, + get_config_name(head_config), + get_config_metric_id(head_config), pmu, + &config_terms, auto_merge_stats, /*cpu_list=*/NULL); if (!evsel) return -ENOMEM; @@ -1644,44 +1640,50 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, struct list_head **listp) + char *str, struct list_head *head, + struct list_head **listp) { struct parse_events_term *term; - struct list_head *list; + struct list_head *list = NULL; struct perf_pmu *pmu = NULL; int ok = 0; + char *config; *listp = NULL; + + if (!head) { + head = malloc(sizeof(struct list_head)); + if (!head) + goto out_err; + + INIT_LIST_HEAD(head); + } + config = strdup(str); + if (!config) + goto out_err; + + if (parse_events_term__num(&term, + PARSE_EVENTS__TERM_TYPE_USER, + config, 1, false, &config, + NULL) < 0) { + free(config); + goto out_err; + } + list_add_tail(&term->list, head); + + /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) - return -1; + goto out_err; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { - struct list_head *head; - char *config; - - head = malloc(sizeof(struct list_head)); - if (!head) - return -1; - INIT_LIST_HEAD(head); - config = strdup(str); - if (!config) - return -1; - if (parse_events_term__num(&term, - PARSE_EVENTS__TERM_TYPE_USER, - config, 1, false, &config, - NULL) < 0) { - free(list); - free(config); - return -1; - } - list_add_tail(&term->list, head); - if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { @@ -1689,17 +1691,17 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, pmu->name, alias->str); ok++; } - - parse_events_terms__delete(head); } } } - if (!ok) { +out_err: + if (ok) + *listp = list; + else free(list); - return -1; - } - *listp = list; - return 0; + + parse_events_terms__delete(head); + return ok ? 0 : -1; } int parse_events__modifier_group(struct list_head *list, @@ -2029,7 +2031,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) return 0; } -int parse_events_name(struct list_head *list, char *name) +int parse_events_name(struct list_head *list, const char *name) { struct evsel *evsel; @@ -2299,6 +2301,52 @@ int __parse_events(struct evlist *evlist, const char *str, return ret; } +void parse_events_error__init(struct parse_events_error *err) +{ + bzero(err, sizeof(*err)); +} + +void parse_events_error__exit(struct parse_events_error *err) +{ + zfree(&err->str); + zfree(&err->help); + zfree(&err->first_str); + zfree(&err->first_help); +} + +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help) +{ + if (WARN(!str, "WARNING: failed to provide error string\n")) { + free(help); + return; + } + switch (err->num_errors) { + case 0: + err->idx = idx; + err->str = str; + err->help = help; + break; + case 1: + err->first_idx = err->idx; + err->idx = idx; + err->first_str = err->str; + err->str = str; + err->first_help = err->help; + err->help = help; + break; + default: + pr_debug("Multiple errors dropping message: %s (%s)\n", + err->str, err->help); + free(err->str); + err->str = str; + free(err->help); + err->help = help; + break; + } + err->num_errors++; +} + #define MAX_WIDTH 1000 static int get_term_width(void) { @@ -2308,8 +2356,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void __parse_events_print_error(int err_idx, const char *err_str, - const char *err_help, const char *event) +static void __parse_events_error__print(int err_idx, const char *err_str, + const char *err_help, const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -2363,22 +2411,18 @@ static void __parse_events_print_error(int err_idx, const char *err_str, } } -void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_error__print(struct parse_events_error *err, + const char *event) { if (!err->num_errors) return; - __parse_events_print_error(err->idx, err->str, err->help, event); - zfree(&err->str); - zfree(&err->help); + __parse_events_error__print(err->idx, err->str, err->help, event); if (err->num_errors > 1) { fputs("\nInitial error:\n", stderr); - __parse_events_print_error(err->first_idx, err->first_str, + __parse_events_error__print(err->first_idx, err->first_str, err->first_help, event); - zfree(&err->first_str); - zfree(&err->first_help); } } @@ -2391,13 +2435,14 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, str, &err); if (ret) { - parse_events_print_error(&err, str); + parse_events_error__print(&err, str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } + parse_events_error__exit(&err); return ret; } @@ -2703,7 +2748,7 @@ next: return 0; } -static bool is_event_supported(u8 type, unsigned config) +static bool is_event_supported(u8 type, u64 config) { bool ret = true; int open_return; @@ -2823,10 +2868,18 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0; - char name[64]; - char **evt_list = NULL; + unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; + char name[64], new_name[128]; + char **evt_list = NULL, **evt_pmus = NULL; bool evt_num_known = false; + struct perf_pmu *pmu = NULL; + + if (perf_pmu__has_hybrid()) { + npmus = perf_pmu__hybrid_pmu_num(); + evt_pmus = zalloc(sizeof(char *) * npmus); + if (!evt_pmus) + goto out_enomem; + } restart: if (evt_num_known) { @@ -2842,20 +2895,61 @@ restart: continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + unsigned int hybrid_supported = 0, j; + bool supported; + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) - continue; + if (!perf_pmu__has_hybrid()) { + if (!is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) { + continue; + } + } else { + perf_pmu__for_each_hybrid_pmu(pmu) { + if (!evt_num_known) { + evt_num++; + continue; + } + + supported = is_event_supported( + PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); + if (supported) { + snprintf(new_name, sizeof(new_name), "%s/%s/", + pmu->name, name); + evt_pmus[hybrid_supported] = strdup(new_name); + hybrid_supported++; + } + } + + if (hybrid_supported == 0) + continue; + } if (!evt_num_known) { evt_num++; continue; } - evt_list[evt_i] = strdup(name); + if ((hybrid_supported == 0) || + (hybrid_supported == npmus)) { + evt_list[evt_i] = strdup(name); + if (npmus > 0) { + for (j = 0; j < npmus; j++) + zfree(&evt_pmus[j]); + } + } else { + for (j = 0; j < hybrid_supported; j++) { + evt_list[evt_i++] = evt_pmus[j]; + evt_pmus[j] = NULL; + } + continue; + } + if (evt_list[evt_i] == NULL) goto out_enomem; evt_i++; @@ -2867,6 +2961,13 @@ restart: evt_num_known = true; goto restart; } + + for (evt_i = 0; evt_i < evt_num; evt_i++) { + if (!evt_list[evt_i]) + break; + } + + evt_num = evt_i; qsort(evt_list, evt_num, sizeof(char *), cmp_string); evt_i = 0; while (evt_i < evt_num) { @@ -2885,6 +2986,10 @@ out_free: for (evt_i = 0; evt_i < evt_num; evt_i++) zfree(&evt_list[evt_i]); zfree(&evt_list); + + for (evt_i = 0; evt_i < npmus; evt_i++) + zfree(&evt_pmus[evt_i]); + zfree(&evt_pmus); return evt_num; out_enomem: @@ -2994,7 +3099,8 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -3006,7 +3112,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated); + details_flag, deprecated, pmu_name); if (event_glob != NULL) return; @@ -3032,7 +3138,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only, details_flag); + metricgroup__print(true, true, NULL, name_only, details_flag, + pmu_name); print_libpfm_events(name_only, long_desc); } @@ -3083,7 +3190,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config, + .config = config ? : strdup(config_term_names[type_term]), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -3227,7 +3334,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, if (!parse_state->error) return; - parse_events__handle_error(parse_state->error, idx, strdup(str), NULL); + parse_events_error__handle(parse_state->error, idx, strdup(str), NULL); } static void config_terms_list(char *buf, size_t buf_sz) @@ -3286,9 +3393,12 @@ fail: struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, + const char *metric_id, + struct perf_pmu *pmu, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, pmu, - config_terms, false, NULL); + return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, + pmu, config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index bf6e41aa9b6a..c7fc93f54577 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -87,6 +87,7 @@ enum { PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, + PARSE_EVENTS__TERM_TYPE_METRIC_ID, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -141,8 +142,6 @@ struct parse_events_state { char *hybrid_pmu_name; }; -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -162,7 +161,7 @@ void parse_events_terms__purge(struct list_head *terms); void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); -int parse_events_name(struct list_head *list, char *name); +int parse_events_name(struct list_head *list, const char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, @@ -182,10 +181,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); -enum perf_tool_event; int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event); + int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, @@ -200,10 +198,12 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_alias); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu); + const char *name, const char *metric_id, + struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, + struct list_head *head_config, struct list_head **listp); int parse_events_copy_term_list(struct list_head *old, @@ -219,7 +219,8 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, bool deprecated); + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name); struct event_symbol { const char *symbol; @@ -241,8 +242,12 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); -void parse_events_print_error(struct parse_events_error *err, - const char *event); +void parse_events_error__init(struct parse_events_error *err); +void parse_events_error__exit(struct parse_events_error *err); +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help); +void parse_events_error__print(struct parse_events_error *err, + const char *event); #ifdef HAVE_LIBELF_SUPPORT /* @@ -267,7 +272,9 @@ int perf_pmu__test_parse_init(void); struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, + const char *metric_id, + struct perf_pmu *pmu, struct list_head *config_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 923849024b15..4efe9872c667 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -12,7 +12,6 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" #include "evsel.h" @@ -139,18 +138,23 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat yylval->str = strdup(text); - if (parse_state->fake_pmu) - return PE_PMU_EVENT_FAKE; - + /* + * If we're not testing then parse check determines the PMU event type + * which if it isn't a PMU returns PE_NAME. When testing the result of + * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless + * an '!' is present in which case the text can't be a PMU name. + */ switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; case PMU_EVENT_SYMBOL: - return PE_KERNEL_PMU_EVENT; + return parse_state->fake_pmu + ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; default: - return PE_NAME; + return parse_state->fake_pmu && !strchr(text,'!') + ? PE_PMU_EVENT_FAKE : PE_NAME; } } @@ -205,7 +209,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? @@ -295,6 +299,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } r{num_raw_hex} { return raw(yyscanner); } r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d94e48e1ff9b..174158982fae 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -183,6 +183,11 @@ group_def ':' PE_MODIFIER_EVENT err = parse_events__modifier_group(list, $3); free($3); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events_error__handle(error, @3.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -240,6 +245,11 @@ event_name PE_MODIFIER_EVENT err = parse_events__modifier_event(list, $2, false); free($2); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events_error__handle(error, @2.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -342,7 +352,20 @@ PE_KERNEL_PMU_EVENT sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); + free($1); + if (err < 0) + YYABORT; + $$ = list; +} +| +PE_KERNEL_PMU_EVENT opt_pmu_config +{ + struct list_head *list; + int err; + + /* frees $2 */ + err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); free($1); if (err < 0) YYABORT; @@ -357,7 +380,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); free($1); free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) YYABORT; $$ = list; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 756295dedccc..f0bcfcab1a93 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -87,7 +87,8 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, pmu = perf_pmu__find_by_type((unsigned int)attr.type); evsel = parse_events__add_event(evlist->core.nr_entries, - &attr, q, pmu); + &attr, q, /*metric_id=*/NULL, + pmu); if (evsel == NULL) goto error; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bdabd62170d2..6ae58406f4fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -315,7 +315,7 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc, char *val, struct pmu_event *pe) + char *desc, char *val, const struct pmu_event *pe) { struct parse_events_term *term; struct perf_pmu_alias *alias; @@ -710,9 +710,9 @@ static char *perf_pmu__getcpuid(struct perf_pmu *pmu) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; char *cpuid = perf_pmu__getcpuid(pmu); int i; @@ -737,7 +737,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -struct pmu_events_map *__weak pmu_events_map__find(void) +const struct pmu_events_map *__weak pmu_events_map__find(void) { return perf_pmu__find_map(NULL); } @@ -824,7 +824,7 @@ out: * as aliases. */ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map) + const struct pmu_events_map *map) { int i; const char *name = pmu->name; @@ -834,7 +834,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, i = 0; while (1) { const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; - struct pmu_event *pe = &map->table[i++]; + const struct pmu_event *pe = &map->table[i++]; const char *pname = pe->pmu ? pe->pmu : cpu_name; if (!pe->name) { @@ -859,7 +859,7 @@ new_alias: static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; map = perf_pmu__find_map(pmu); if (!map) @@ -873,7 +873,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) int i = 0; while (1) { - struct pmu_sys_events *event_table; + const struct pmu_sys_events *event_table; int j = 0; event_table = &pmu_sys_event_tables[i++]; @@ -882,7 +882,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) break; while (1) { - struct pmu_event *pe = &event_table->table[j++]; + const struct pmu_event *pe = &event_table->table[j++]; int ret; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -900,7 +900,7 @@ struct pmu_sys_event_iter_data { struct perf_pmu *pmu; }; -static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) +static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, void *data) { struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; @@ -1283,7 +1283,7 @@ static int pmu_config_term(const char *pmu_name, unknown_term = NULL; help_msg = parse_events_formats_error_string(pmu_term); if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, unknown_term, help_msg); } else { @@ -1316,7 +1316,7 @@ static int pmu_config_term(const char *pmu_name, if (term->no_value && bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) { if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("no value assigned for term"), NULL); } @@ -1331,7 +1331,7 @@ static int pmu_config_term(const char *pmu_name, term->config, term->val.str); } if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected numeric value"), NULL); } @@ -1348,7 +1348,7 @@ static int pmu_config_term(const char *pmu_name, if (err) { char *err_str; - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, asprintf(&err_str, "value too big for format, maximum is %llu", (unsigned long long)max_val) < 0 @@ -1608,6 +1608,7 @@ static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + int ret; /* Put extra events last */ if (!!as->desc != !!bs->desc) @@ -1623,7 +1624,13 @@ static int cmp_sevent(const void *a, const void *b) if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - return strcmp(as->name, bs->name); + ret = strcmp(as->name, bs->name); + if (!ret) { + if (as->pmu && bs->pmu) + return strcmp(as->pmu, bs->pmu); + } + + return ret; } static void wordwrap(char *s, int start, int max, int corr) @@ -1653,7 +1660,8 @@ bool is_pmu_core(const char *name) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1679,10 +1687,16 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu_name && perf_pmu__is_hybrid(pmu->name) && + strcmp(pmu_name, pmu->name)) { + continue; + } + list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name); + bool is_cpu = is_pmu_core(pmu->name) || + perf_pmu__is_hybrid(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1730,8 +1744,13 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) - continue; + if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { + if (!aliases[j].pmu || !aliases[j - 1].pmu || + !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { + continue; + } + } + if (name_only) { printf("%s ", aliases[j].name); continue; @@ -1906,7 +1925,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name) + const char *name) { struct perf_pmu_format *format; __u64 masks = 0, bits; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 394898b07fd9..541889fa9f9c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -49,6 +49,10 @@ struct perf_pmu { struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; + + struct { + bool exclude_guest; + } missing_features; }; extern struct perf_pmu perf_pmu__fake; @@ -111,7 +115,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, - bool deprecated); + bool deprecated, const char *pmu_name); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); @@ -120,21 +124,21 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map); + const struct pmu_events_map *map); -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -struct pmu_events_map *pmu_events_map__find(void); +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +const struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); -typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); +typedef int (*pmu_sys_event_iter_fn)(const struct pmu_event *pe, void *data); void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name); + const char *name); bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index d7c976671e3a..a685d20165f7 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,6 +18,7 @@ util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c +../lib/list_sort.c ../lib/hweight.c ../lib/string.c ../lib/vsprintf.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8feef3a05af7..563a9ba8954f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -70,6 +70,18 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } /* + * This one is needed not to drag the PMU bandwagon, jevents generated + * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for + * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so + * far, for the perf python binding known usecases, revisit if this become + * necessary. + */ +struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) +{ + return NULL; +} + +/* * Add this one here not to drag util/metricgroup.c */ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 68f471d9a88b..ef6c2715fdd9 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -77,6 +77,7 @@ struct record_opts { int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; + int synth; }; extern const char * const *record_usage; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 8130b56aa04b..f3fdad28a852 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -244,7 +244,7 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basicp) { struct hws_basic_entry *basic = basicp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_basic_entry local; unsigned long long word = be64toh(*(unsigned long long *)basicp); @@ -288,7 +288,7 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, struct hws_diag_entry *diagp) { struct hws_diag_entry *diag = diagp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_diag_entry local; unsigned long long word = be64toh(*(unsigned long long *)diagp); @@ -322,7 +322,7 @@ static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_trailer_entry local; const unsigned long long flags = be64toh(te->flags); @@ -552,7 +552,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; progusage2 = be64toh(te->progusage[1]); #else diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 08ec3c3ae0ee..cd3a34840389 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -135,12 +135,12 @@ static int get_counterset_start(int setnr) * the name of this counter. * If no match is found a NULL pointer is returned. */ -static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) +static const char *get_counter_name(int set, int nr, const struct pmu_events_map *map) { int rc, event_nr, wanted = get_counterset_start(set) + nr; if (map) { - struct pmu_event *evp = map->table; + const struct pmu_event *evp = map->table; for (; evp->name || evp->event || evp->desc; ++evp) { if (evp->name == NULL || evp->event == NULL) @@ -159,7 +159,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; - struct pmu_events_map *map; + const struct pmu_events_map *map; u64 *p; map = pmu_events_map__find(); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 069c2cfdd3be..d8857d1b6d7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -44,7 +44,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, size_t decomp_size, src_size; u64 decomp_last_rem = 0; size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->decomp_last; + struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; if (decomp_last) { decomp_last_rem = decomp_last->size - decomp_last->head; @@ -71,7 +71,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, src = (void *)event + sizeof(struct perf_record_compressed); src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, + decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); if (!decomp_size) { munmap(decomp, mmap_len); @@ -81,13 +81,12 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size += decomp_size; - if (session->decomp == NULL) { - session->decomp = decomp; - session->decomp_last = decomp; - } else { - session->decomp_last->next = decomp; - session->decomp_last = decomp; - } + if (session->active_decomp->decomp == NULL) + session->active_decomp->decomp = decomp; + else + session->active_decomp->decomp_last->next = decomp; + + session->active_decomp->decomp_last = decomp; pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); @@ -197,6 +196,8 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->repipe = repipe; session->tool = tool; + session->decomp_data.zstd_decomp = &session->zstd_data; + session->active_decomp = &session->decomp_data; INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); ordered_events__init(&session->ordered_events, @@ -276,11 +277,11 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session__release_decomp_events(struct perf_session *session) +static void perf_decomp__release_events(struct decomp *next) { - struct decomp *next, *decomp; + struct decomp *decomp; size_t mmap_len; - next = session->decomp; + do { decomp = next; if (decomp == NULL) @@ -299,7 +300,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session__release_decomp_events(session); + perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) { @@ -509,6 +510,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->bpf = perf_event__process_bpf; if (tool->text_poke == NULL) tool->text_poke = perf_event__process_text_poke; + if (tool->aux_output_hw_id == NULL) + tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1000,6 +1003,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, + [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1556,6 +1560,8 @@ static int machines__deliver_event(struct machines *machines, return tool->bpf(tool, event, sample, machine); case PERF_RECORD_TEXT_POKE: return tool->text_poke(tool, event, sample, machine); + case PERF_RECORD_AUX_OUTPUT_HW_ID: + return tool->aux_output_hw_id(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -2116,8 +2122,8 @@ fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) static int __perf_session__process_decomp_events(struct perf_session *session) { s64 skip; - u64 size, file_pos = 0; - struct decomp *decomp = session->decomp_last; + u64 size; + struct decomp *decomp = session->active_decomp->decomp_last; if (!decomp) return 0; @@ -2132,7 +2138,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session) size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, file_pos)) < 0) { + (skip = perf_session__process_event(session, event, decomp->file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", decomp->file_pos + decomp->head, event->header.size, event->header.type); return -EINVAL; @@ -2171,35 +2177,55 @@ struct reader { u64 data_offset; reader_cb_t process; bool in_place_update; + char *mmaps[NUM_MMAPS]; + size_t mmap_size; + int mmap_idx; + char *mmap_cur; + u64 file_pos; + u64 file_offset; + u64 head; + struct zstd_data zstd_data; + struct decomp_data decomp_data; }; static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) +reader__init(struct reader *rd, bool *one_mmap) { u64 data_size = rd->data_size; - u64 head, page_offset, file_offset, file_pos, size; - int err = 0, mmap_prot, mmap_flags, map_idx = 0; - size_t mmap_size; - char *buf, *mmaps[NUM_MMAPS]; - union perf_event *event; - s64 skip; - - page_offset = page_size * (rd->data_offset / page_size); - file_offset = page_offset; - head = rd->data_offset - page_offset; - - ui_progress__init_size(prog, data_size, "Processing events..."); + char **mmaps = rd->mmaps; + rd->head = rd->data_offset; data_size += rd->data_offset; - mmap_size = MMAP_SIZE; - if (mmap_size > data_size) { - mmap_size = data_size; - session->one_mmap = true; + rd->mmap_size = MMAP_SIZE; + if (rd->mmap_size > data_size) { + rd->mmap_size = data_size; + if (one_mmap) + *one_mmap = true; } - memset(mmaps, 0, sizeof(mmaps)); + memset(mmaps, 0, sizeof(rd->mmaps)); + + if (zstd_init(&rd->zstd_data, 0)) + return -1; + rd->decomp_data.zstd_decomp = &rd->zstd_data; + + return 0; +} + +static void +reader__release_decomp(struct reader *rd) +{ + perf_decomp__release_events(rd->decomp_data.decomp); + zstd_fini(&rd->zstd_data); +} + +static int +reader__mmap(struct reader *rd, struct perf_session *session) +{ + int mmap_prot, mmap_flags; + char *buf, **mmaps = rd->mmaps; + u64 page_offset; mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2210,47 +2236,63 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } -remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, - file_offset); + + if (mmaps[rd->mmap_idx]) { + munmap(mmaps[rd->mmap_idx], rd->mmap_size); + mmaps[rd->mmap_idx] = NULL; + } + + page_offset = page_size * (rd->head / page_size); + rd->file_offset += page_offset; + rd->head -= page_offset; + + buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, + rd->file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); - err = -errno; - goto out; + return -errno; } - mmaps[map_idx] = buf; - map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); - file_pos = file_offset + head; + mmaps[rd->mmap_idx] = rd->mmap_cur = buf; + rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1); + rd->file_pos = rd->file_offset + rd->head; if (session->one_mmap) { session->one_mmap_addr = buf; - session->one_mmap_offset = file_offset; + session->one_mmap_offset = rd->file_offset; } -more: - event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); + return 0; +} + +enum { + READER_OK, + READER_NODATA, +}; + +static int +reader__read_event(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + u64 size; + int err = READER_OK; + union perf_event *event; + s64 skip; + + event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, + session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); - if (!event) { - if (mmaps[map_idx]) { - munmap(mmaps[map_idx], mmap_size); - mmaps[map_idx] = NULL; - } - - page_offset = page_size * (head / page_size); - file_offset += page_offset; - head -= page_offset; - goto remap; - } + if (!event) + return READER_NODATA; size = event->header.size; skip = -EINVAL; if (size < sizeof(struct perf_event_header) || - (skip = rd->process(session, event, file_pos)) < 0) { + (skip = rd->process(session, event, rd->file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n", - file_offset + head, event->header.size, + rd->file_offset + rd->head, event->header.size, event->header.type, strerror(-skip)); err = skip; goto out; @@ -2259,8 +2301,8 @@ more: if (skip) size += skip; - head += size; - file_pos += size; + rd->head += size; + rd->file_pos += size; err = __perf_session__process_decomp_events(session); if (err) @@ -2268,13 +2310,48 @@ more: ui_progress__update(prog, size); +out: + return err; +} + +static inline bool +reader__eof(struct reader *rd) +{ + return (rd->file_pos >= rd->data_size + rd->data_offset); +} + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + int err; + + err = reader__init(rd, &session->one_mmap); + if (err) + goto out; + + session->active_decomp = &rd->decomp_data; + +remap: + err = reader__mmap(rd, session); + if (err) + goto out; + +more: + err = reader__read_event(rd, session, prog); + if (err < 0) + goto out; + else if (err == READER_NODATA) + goto remap; + if (session_done()) goto out; - if (file_pos < data_size) + if (!reader__eof(rd)) goto more; out: + session->active_decomp = &session->decomp_data; return err; } @@ -2327,6 +2404,7 @@ out_err: */ ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); + reader__release_decomp(&rd); session->one_mmap = false; return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5d8bd14a0a39..46c854292ad6 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,12 @@ struct thread; struct auxtrace; struct itrace_synth_opts; +struct decomp_data { + struct decomp *decomp; + struct decomp *decomp_last; + struct zstd_data *zstd_decomp; +}; + struct perf_session { struct perf_header header; struct machines machines; @@ -39,8 +45,8 @@ struct perf_session { u64 bytes_transferred; u64 bytes_compressed; struct zstd_data zstd_data; - struct decomp *decomp; - struct decomp *decomp_last; + struct decomp_data decomp_data; + struct decomp_data *active_decomp; }; struct decomp { diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 5b7d6c16d33f..af468e3bb6fa 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <inttypes.h> +#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -15,6 +17,7 @@ #include "srcline.h" #include "string2.h" #include "symbol.h" +#include "subcmd/run-command.h" bool srcline_full_filename; @@ -119,6 +122,8 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } +#define MAX_INLINE_NEST 1024 + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -273,8 +278,6 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } -#define MAX_INLINE_NEST 1024 - static int inline_list__append_dso_a2l(struct dso *dso, struct inline_node *node, struct symbol *sym) @@ -361,26 +364,14 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso, struct symbol *sym) -{ - struct inline_node *node; - - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); - return NULL; - } - - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); - return node; -} - #else /* HAVE_LIBBFD_SUPPORT */ +struct a2l_subprocess { + struct child_process addr2line; + FILE *to_child; + FILE *from_child; +}; + static int filename_split(char *filename, unsigned int *line_nr) { char *sep; @@ -402,114 +393,285 @@ static int filename_split(char *filename, unsigned int *line_nr) return 0; } -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line_nr, - struct dso *dso __maybe_unused, - bool unwind_inlines __maybe_unused, - struct inline_node *node __maybe_unused, - struct symbol *sym __maybe_unused) +static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l) { - FILE *fp; - char cmd[PATH_MAX]; - char *filename = NULL; - size_t len; - int ret = 0; + if (a2l->addr2line.pid != -1) { + kill(a2l->addr2line.pid, SIGKILL); + finish_command(&a2l->addr2line); /* ignore result, we don't care */ + a2l->addr2line.pid = -1; + } - scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, - dso_name, addr); + if (a2l->to_child != NULL) { + fclose(a2l->to_child); + a2l->to_child = NULL; + } - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_warning("popen failed for %s\n", dso_name); - return 0; + if (a2l->from_child != NULL) { + fclose(a2l->from_child); + a2l->from_child = NULL; + } + + free(a2l); +} + +static struct a2l_subprocess *addr2line_subprocess_init(const char *path) +{ + const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL }; + struct a2l_subprocess *a2l = zalloc(sizeof(*a2l)); + int start_command_status = 0; + + if (a2l == NULL) + goto out; + + a2l->to_child = NULL; + a2l->from_child = NULL; + + a2l->addr2line.pid = -1; + a2l->addr2line.in = -1; + a2l->addr2line.out = -1; + a2l->addr2line.no_stderr = 1; + + a2l->addr2line.argv = argv; + start_command_status = start_command(&a2l->addr2line); + a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */ + + if (start_command_status != 0) { + pr_warning("could not start addr2line for %s: start_command return code %d\n", + path, + start_command_status); + goto out; } - if (getline(&filename, &len, fp) < 0 || !len) { - pr_warning("addr2line has no output for %s\n", dso_name); + a2l->to_child = fdopen(a2l->addr2line.in, "w"); + if (a2l->to_child == NULL) { + pr_warning("could not open write-stream to addr2line of %s\n", path); goto out; } - ret = filename_split(filename, line_nr); - if (ret != 1) { - free(filename); + a2l->from_child = fdopen(a2l->addr2line.out, "r"); + if (a2l->from_child == NULL) { + pr_warning("could not open read-stream from addr2line of %s\n", path); goto out; } - *file = filename; + return a2l; out: - pclose(fp); - return ret; + if (a2l) + addr2line_subprocess_cleanup(a2l); + + return NULL; } -void dso__free_a2l(struct dso *dso __maybe_unused) +static int read_addr2line_record(struct a2l_subprocess *a2l, + char **function, + char **filename, + unsigned int *line_nr) { + /* + * Returns: + * -1 ==> error + * 0 ==> sentinel (or other ill-formed) record read + * 1 ==> a genuine record read + */ + char *line = NULL; + size_t line_len = 0; + unsigned int dummy_line_nr = 0; + int ret = -1; + + if (function != NULL) + zfree(function); + + if (filename != NULL) + zfree(filename); + + if (line_nr != NULL) + *line_nr = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (function != NULL) + *function = strdup(strim(line)); + + zfree(&line); + line_len = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0) { + ret = 0; + goto error; + } + + if (filename != NULL) + *filename = strdup(line); + + zfree(&line); + line_len = 0; + + return 1; + +error: + free(line); + if (function != NULL) + zfree(function); + if (filename != NULL) + zfree(filename); + return ret; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso __maybe_unused, - struct symbol *sym) +static int inline_list__append_record(struct dso *dso, + struct inline_node *node, + struct symbol *sym, + const char *function, + const char *filename, + unsigned int line_nr) { - FILE *fp; - char cmd[PATH_MAX]; - struct inline_node *node; - char *filename = NULL; - char *funcname = NULL; - size_t filelen, funclen; - unsigned int line_nr = 0; + struct symbol *inline_sym = new_inline_sym(dso, sym, function); - scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, - dso_name, addr); + return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); +} - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_err("popen failed for %s\n", dso_name); - return NULL; +static int addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym __maybe_unused) +{ + struct a2l_subprocess *a2l = dso->a2l; + char *record_function = NULL; + char *record_filename = NULL; + unsigned int record_line_nr = 0; + int record_status = -1; + int ret = 0; + size_t inline_count = 0; + + if (!a2l) { + dso->a2l = addr2line_subprocess_init(dso_name); + a2l = dso->a2l; } - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); goto out; } - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - /* addr2line -f generates two lines for each inlined functions */ - while (getline(&funcname, &funclen, fp) != -1) { - char *srcline; - struct symbol *inline_sym; + /* + * Send our request and then *deliberately* send something that can't be interpreted as + * a valid address to ask addr2line about (namely, ","). This causes addr2line to first + * write out the answer to our request, in an unbounded/unknown number of records, and + * then to write out the lines "??" and "??:0", so that we can detect when it has + * finished giving us anything useful. We have to be careful about the first record, + * though, because it may be genuinely unknown, in which case we'll get two sets of + * "??"/"??:0" lines. + */ + if (fprintf(a2l->to_child, "%016"PRIx64"\n,\n", addr) < 0 || fflush(a2l->to_child) != 0) { + pr_warning("%s %s: could not send request\n", __func__, dso_name); + goto out; + } - strim(funcname); + switch (read_addr2line_record(a2l, &record_function, &record_filename, &record_line_nr)) { + case -1: + pr_warning("%s %s: could not read first record\n", __func__, dso_name); + goto out; + case 0: + /* + * The first record was invalid, so return failure, but first read another + * record, since we asked a junk question and have to clear the answer out. + */ + switch (read_addr2line_record(a2l, NULL, NULL, NULL)) { + case -1: + pr_warning("%s %s: could not read delimiter record\n", __func__, dso_name); + break; + case 0: + /* As expected. */ + break; + default: + pr_warning("%s %s: unexpected record instead of sentinel", + __func__, dso_name); + break; + } + goto out; + default: + break; + } - if (getline(&filename, &filelen, fp) == -1) - goto out; + if (file) { + *file = strdup(record_filename); + ret = 1; + } + if (line_nr) + *line_nr = record_line_nr; - if (filename_split(filename, &line_nr) != 1) + if (unwind_inlines) { + if (node && inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; goto out; + } + } - srcline = srcline_from_fileline(filename, line_nr); - inline_sym = new_inline_sym(dso, sym, funcname); - - if (inline_list__append(inline_sym, srcline, node) != 0) { - free(srcline); - if (inline_sym && inline_sym->inlined) - symbol__delete(inline_sym); - goto out; + /* We have to read the records even if we don't care about the inline info. */ + while ((record_status = read_addr2line_record(a2l, + &record_function, + &record_filename, + &record_line_nr)) == 1) { + if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { + if (inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + ret = 1; /* found at least one inline frame */ } } out: - pclose(fp); - free(filename); - free(funcname); + free(record_function); + free(record_filename); + return ret; +} - return node; +void dso__free_a2l(struct dso *dso) +{ + struct a2l_subprocess *a2l = dso->a2l; + + if (!a2l) + return; + + addr2line_subprocess_cleanup(a2l); + + dso->a2l = NULL; } #endif /* HAVE_LIBBFD_SUPPORT */ +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso, struct symbol *sym) +{ + struct inline_node *node; + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + return node; +} + /* * Number of addr2line failures (without success) before disabling it for that * dso. diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 34a7f5c1fff7..e4fb02b05130 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#include <math.h> #include <stdio.h> #include "evsel.h" #include "stat.h" #include "color.h" +#include "debug.h" #include "pmu.h" #include "rblist.h" #include "evlist.h" @@ -370,12 +372,16 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { struct evsel *counter, *leader, **metric_events, *oc; bool found; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; struct hashmap_entry *cur; size_t bkt; int i; - expr__ctx_init(&ctx); + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return; + } evlist__for_each_entry(evsel_list, counter) { bool invalid = false; @@ -383,25 +389,25 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) if (!counter->metric_expr) continue; - expr__ctx_clear(&ctx); + expr__ctx_clear(ctx); metric_events = counter->metric_events; if (!metric_events) { - if (expr__find_other(counter->metric_expr, - counter->name, - &ctx, 1) < 0) + if (expr__find_ids(counter->metric_expr, + counter->name, + ctx) < 0) continue; metric_events = calloc(sizeof(struct evsel *), - hashmap__size(&ctx.ids) + 1); + hashmap__size(ctx->ids) + 1); if (!metric_events) { - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return; } counter->metric_events = metric_events; } i = 0; - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *metric_name = (const char *)cur->key; found = false; @@ -438,6 +444,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) "Add %s event to groups to get metric expression for %s\n", metric_name, counter->name); + free(printed); printed = strdup(metric_name); } invalid = true; @@ -453,7 +460,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) counter->metric_expr = NULL; } } - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); } static double runtime_stat_avg(struct runtime_stat *st, @@ -815,10 +822,9 @@ static int prepare_metric(struct evsel **metric_events, struct runtime_stat *st) { double scale; - char *n, *pn; + char *n; int i, j, ret; - expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -839,23 +845,11 @@ static int prepare_metric(struct evsel **metric_events, if (v->metric_other) metric_total = v->metric_total; } - - n = strdup(metric_events[i]->name); + n = strdup(evsel__metric_id(metric_events[i])); if (!n) return -ENOMEM; - /* - * This display code with --no-merge adds [cpu] postfixes. - * These are not supported by the parser. Remove everything - * after the space. - */ - pn = strchr(n, ' '); - if (pn) - *pn = 0; - - if (metric_total) - expr__add_id_val(pctx, n, metric_total); - else - expr__add_id_val(pctx, n, avg_stats(stats)*scale); + + expr__add_id_val(pctx, n, metric_total ? : avg_stats(stats) * scale); } for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { @@ -880,17 +874,23 @@ static void generic_metric(struct perf_stat_config *config, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio, scale; int i; void *ctxp = out->ctx; - i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); - if (i < 0) + pctx = expr__ctx_new(); + if (!pctx) return; + pctx->runtime = runtime; + i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + if (i < 0) { + expr__ctx_free(pctx); + return; + } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { + if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; @@ -926,22 +926,26 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); } double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) { - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio = 0.0; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) + pctx = expr__ctx_new(); + if (!pctx) + return NAN; + + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) goto out; - if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) + if (expr__parse(&ratio, pctx, mexp->metric_expr)) ratio = 0.0; out: - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); return ratio; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0fc9a5410739..aa1b7c12fd61 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -702,6 +702,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__filter(type)) return 0; + /* Ignore local symbols for ARM modules */ + if (name[0] == '$') + return 0; + /* * module symbols are not sorted so we add all * symbols, setting length to 0, and rely on @@ -2630,3 +2634,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +/* + * Checks that user supplied symbol kernel files are accessible because + * the default mechanism for accessing elf files fails silently. i.e. if + * debug syms for a build ID aren't found perf carries on normally. When + * they are user supplied we should assume that the user doesn't want to + * silently fail. + */ +int symbol__validate_sym_arguments(void) +{ + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + return 0; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 954d6a049ee2..166196686f2e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -286,4 +286,6 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +int symbol__validate_sym_arguments(void); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a7e981b2d7de..198982109f0f 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -715,7 +715,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, bool mmap_data) + struct perf_tool *tool, struct machine *machine, + bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; struct dirent **dirent; @@ -739,7 +740,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, * send mmap only for thread group leader * see thread__init_maps() */ - if (pid == tgid && + if (pid == tgid && needs_mmap && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data)) return -1; @@ -786,7 +787,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, break; rc = 0; - if (_pid == pid && !kernel_thread) { + if (_pid == pid && !kernel_thread && needs_mmap) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); @@ -806,7 +807,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool mmap_data) + bool needs_mmap, bool mmap_data) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; @@ -836,7 +837,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, perf_thread_map__pid(threads, thread), 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -862,7 +863,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -882,6 +883,7 @@ out: static int __perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, + bool needs_mmap, bool mmap_data, struct dirent **dirent, int start, @@ -926,7 +928,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, - tool, machine, mmap_data); + tool, machine, needs_mmap, mmap_data); } err = 0; @@ -945,6 +947,7 @@ struct synthesize_threads_arg { struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; + bool needs_mmap; bool mmap_data; struct dirent **dirent; int num; @@ -956,7 +959,8 @@ static void *synthesize_threads_worker(void *arg) struct synthesize_threads_arg *args = arg; __perf_event__synthesize_threads(args->tool, args->process, - args->machine, args->mmap_data, + args->machine, + args->needs_mmap, args->mmap_data, args->dirent, args->start, args->num); return NULL; @@ -965,7 +969,7 @@ static void *synthesize_threads_worker(void *arg) int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, - bool mmap_data, + bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize) { struct synthesize_threads_arg *args = NULL; @@ -994,7 +998,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, - machine, mmap_data, + machine, + needs_mmap, mmap_data, dirent, base, n); goto free_dirent; } @@ -1015,6 +1020,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, args[i].tool = tool; args[i].process = process; args[i].machine = machine; + args[i].needs_mmap = needs_mmap; args[i].mmap_data = mmap_data; args[i].dirent = dirent; } @@ -1775,26 +1781,27 @@ out_err: int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize) + perf_event__handler_t process, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + return perf_event__synthesize_thread_map(tool, threads, process, machine, + needs_mmap, data_mmap); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, - machine, data_mmap, + return perf_event__synthesize_threads(tool, process, machine, + needs_mmap, data_mmap, nr_threads_synthesize); /* command specified */ return 0; } int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) + struct perf_thread_map *threads, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); + perf_event__process, needs_mmap, + data_mmap, nr_threads_synthesize); } static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) @@ -2230,3 +2237,31 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, return ret; } + +int parse_synth_opt(char *synth) +{ + char *p, *q; + int ret = 0; + + if (synth == NULL) + return -1; + + for (q = synth; (p = strsep(&q, ",")); p = q) { + if (!strcasecmp(p, "no") || !strcasecmp(p, "none")) + return 0; + + if (!strcasecmp(p, "all")) + return PERF_SYNTH_ALL; + + if (!strcasecmp(p, "task")) + ret |= PERF_SYNTH_TASK; + else if (!strcasecmp(p, "mmap")) + ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP; + else if (!strcasecmp(p, "cgroup")) + ret |= PERF_SYNTH_CGROUP; + else + return -1; + } + + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c845e2b9b444..c931433bacbf 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -27,6 +27,18 @@ struct target; union perf_event; +enum perf_record_synth { + PERF_SYNTH_TASK = 1 << 0, + PERF_SYNTH_MMAP = 1 << 1, + PERF_SYNTH_CGROUP = 1 << 2, + + /* last element */ + PERF_SYNTH_MAX = 1 << 3, +}; +#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1) + +int parse_synth_opt(char *str); + typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); @@ -53,8 +65,8 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); @@ -65,10 +77,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, + perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, + struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); #ifdef HAVE_AUXTRACE_SUPPORT diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bbbc0dcd461f..ef873f2cc38f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,6 +53,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + aux_output_hw_id, context_switch, throttle, unthrottle, diff --git a/tools/rcu/extract-stall.sh b/tools/rcu/extract-stall.sh new file mode 100644 index 000000000000..e565697c9f90 --- /dev/null +++ b/tools/rcu/extract-stall.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Extract any RCU CPU stall warnings present in specified file. +# Filter out clocksource lines. Note that preceding-lines excludes the +# initial line of the stall warning but trailing-lines includes it. +# +# Usage: extract-stall.sh dmesg-file [ preceding-lines [ trailing-lines ] ] + +echo $1 +preceding_lines="${2-3}" +trailing_lines="${3-10}" + +awk -v preceding_lines="$preceding_lines" -v trailing_lines="$trailing_lines" ' +suffix <= 0 { + for (i = preceding_lines; i > 0; i--) + last[i] = last[i - 1]; + last[0] = $0; +} + +suffix > 0 { + print $0; + suffix--; + if (suffix <= 0) + print ""; +} + +suffix <= 0 && /detected stall/ { + for (i = preceding_lines; i >= 0; i--) + if (last[i] != "") + print last[i]; + suffix = trailing_lines; +}' < "$1" | tr -d '\015' | grep -v clocksource + diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33f..0c6c7f456887 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/riscv.*/riscv/) ifndef ARCH ARCH := $(HOSTARCH) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild new file mode 100644 index 000000000000..86deba8308a1 --- /dev/null +++ b/tools/testing/cxl/Kbuild @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 +ldflags-y += --wrap=is_acpi_device_node +ldflags-y += --wrap=acpi_get_table +ldflags-y += --wrap=acpi_put_table +ldflags-y += --wrap=acpi_evaluate_integer +ldflags-y += --wrap=acpi_pci_find_root +ldflags-y += --wrap=pci_walk_bus +ldflags-y += --wrap=nvdimm_bus_register + +DRIVERS := ../../../drivers +CXL_SRC := $(DRIVERS)/cxl +CXL_CORE_SRC := $(DRIVERS)/cxl/core +ccflags-y := -I$(srctree)/drivers/cxl/ +ccflags-y += -D__mock=__weak + +obj-m += cxl_acpi.o + +cxl_acpi-y := $(CXL_SRC)/acpi.o +cxl_acpi-y += mock_acpi.o +cxl_acpi-y += config_check.o + +obj-m += cxl_pmem.o + +cxl_pmem-y := $(CXL_SRC)/pmem.o +cxl_pmem-y += config_check.o + +obj-m += cxl_core.o + +cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y += $(CXL_CORE_SRC)/pmem.o +cxl_core-y += $(CXL_CORE_SRC)/regs.o +cxl_core-y += $(CXL_CORE_SRC)/memdev.o +cxl_core-y += $(CXL_CORE_SRC)/mbox.o +cxl_core-y += config_check.o + +cxl_core-y += mock_pmem.o + +obj-m += test/ diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c new file mode 100644 index 000000000000..de5e5b3652fd --- /dev/null +++ b/tools/testing/cxl/config_check.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> + +void check(void) +{ + /* + * These kconfig symbols must be set to "m" for cxl_test to load + * and operate. + */ + BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM)); +} diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c new file mode 100644 index 000000000000..4c8a493ace56 --- /dev/null +++ b/tools/testing/cxl/mock_acpi.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ + +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/acpi.h> +#include <linux/pci.h> +#include <cxl.h> +#include "test/mock.h" + +struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) +{ + int index; + struct acpi_device *adev, *found = NULL; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_bridge(dev)) { + found = ACPI_COMPANION(dev); + goto out; + } + + if (dev->bus == &platform_bus_type) + goto out; + + adev = to_acpi_device(dev); + if (!acpi_pci_find_root(adev->handle)) + goto out; + + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0) { + found = adev; + dev_dbg(host, "found host bridge %s\n", dev_name(&adev->dev)); + } +out: + put_cxl_mock_ops(index); + return found; +} + +static int match_add_root_port(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct pci_bus *root_bus = ctx->root; + struct cxl_port *port = ctx->port; + int type = pci_pcie_type(pdev); + struct device *dev = ctx->dev; + u32 lnkcap, port_num; + int rc; + + if (pdev->bus != root_bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != PCI_EXP_TYPE_ROOT_PORT) + return 0; + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap) != PCIBIOS_SUCCESSFUL) + return 0; + + /* TODO walk DVSEC to find component register base */ + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); + if (rc) { + dev_err(dev, "failed to add dport: %s (%d)\n", + dev_name(&pdev->dev), rc); + ctx->error = rc; + return rc; + } + ctx->count++; + + dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); + + return 0; +} + +static int mock_add_root_port(struct platform_device *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct cxl_port *port = ctx->port; + struct device *dev = ctx->dev; + int rc; + + rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); + if (rc) { + dev_err(dev, "failed to add dport: %s (%d)\n", + dev_name(&pdev->dev), rc); + ctx->error = rc; + return rc; + } + ctx->count++; + + dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev)); + + return 0; +} + +int match_add_root_ports(struct pci_dev *dev, void *data) +{ + int index, rc; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct platform_device *pdev = (struct platform_device *) dev; + + if (ops && ops->is_mock_port(pdev)) + rc = mock_add_root_port(pdev, data); + else + rc = match_add_root_port(dev, data); + + put_cxl_mock_ops(index); + + return rc; +} diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c new file mode 100644 index 000000000000..f7315e6f52c0 --- /dev/null +++ b/tools/testing/cxl/mock_pmem.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include <cxl.h> +#include "test/mock.h" +#include <core/core.h> + +int match_nvdimm_bridge(struct device *dev, const void *data) +{ + int index, rc = 0; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + const struct cxl_nvdimm *cxl_nvd = data; + + if (ops) { + if (dev->type == &cxl_nvdimm_bridge_type && + (ops->is_mock_dev(dev->parent->parent) == + ops->is_mock_dev(cxl_nvd->dev.parent->parent))) + rc = 1; + } else + rc = dev->type == &cxl_nvdimm_bridge_type; + + put_cxl_mock_ops(index); + + return rc; +} diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild new file mode 100644 index 000000000000..4e59e2c911f6 --- /dev/null +++ b/tools/testing/cxl/test/Kbuild @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +ccflags-y := -I$(srctree)/drivers/cxl/ + +obj-m += cxl_test.o +obj-m += cxl_mock.o +obj-m += cxl_mock_mem.o + +cxl_test-y := cxl.o +cxl_mock-y := mock.o +cxl_mock_mem-y := mem.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c new file mode 100644 index 000000000000..cb32f9e27d5d --- /dev/null +++ b/tools/testing/cxl/test/cxl.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2021 Intel Corporation. All rights reserved. + +#include <linux/platform_device.h> +#include <linux/genalloc.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/acpi.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include "mock.h" + +#define NR_CXL_HOST_BRIDGES 4 +#define NR_CXL_ROOT_PORTS 2 + +static struct platform_device *cxl_acpi; +static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; +static struct platform_device + *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; + +static struct acpi_device acpi0017_mock; +static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { + [0] = { + .handle = &host_bridge[0], + }, + [1] = { + .handle = &host_bridge[1], + }, + [2] = { + .handle = &host_bridge[2], + }, + [3] = { + .handle = &host_bridge[3], + }, +}; + +static bool is_mock_dev(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) + if (dev == &cxl_mem[i]->dev) + return true; + if (dev == &cxl_acpi->dev) + return true; + return false; +} + +static bool is_mock_adev(struct acpi_device *adev) +{ + int i; + + if (adev == &acpi0017_mock) + return true; + + for (i = 0; i < ARRAY_SIZE(host_bridge); i++) + if (adev == &host_bridge[i]) + return true; + + return false; +} + +static struct { + struct acpi_table_cedt cedt; + struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES]; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws0; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[4]; + } cfmws1; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws2; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[4]; + } cfmws3; +} __packed mock_cedt = { + .cedt = { + .header = { + .signature = "CEDT", + .length = sizeof(mock_cedt), + .revision = 1, + }, + }, + .chbs[0] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 0, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, + .chbs[1] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 1, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, + .chbs[2] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 2, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, + .chbs[3] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 3, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, + .cfmws0 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws0), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, + .qtg_id = 0, + .window_size = SZ_256M, + }, + .target = { 0 }, + }, + .cfmws1 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws1), + }, + .interleave_ways = 2, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, + .qtg_id = 1, + .window_size = SZ_256M * 4, + }, + .target = { 0, 1, 2, 3 }, + }, + .cfmws2 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws2), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 2, + .window_size = SZ_256M, + }, + .target = { 0 }, + }, + .cfmws3 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws3), + }, + .interleave_ways = 2, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 3, + .window_size = SZ_256M * 4, + }, + .target = { 0, 1, 2, 3 }, + }, +}; + +struct cxl_mock_res { + struct list_head list; + struct range range; +}; + +static LIST_HEAD(mock_res); +static DEFINE_MUTEX(mock_res_lock); +static struct gen_pool *cxl_mock_pool; + +static void depopulate_all_mock_resources(void) +{ + struct cxl_mock_res *res, *_res; + + mutex_lock(&mock_res_lock); + list_for_each_entry_safe(res, _res, &mock_res, list) { + gen_pool_free(cxl_mock_pool, res->range.start, + range_len(&res->range)); + list_del(&res->list); + kfree(res); + } + mutex_unlock(&mock_res_lock); +} + +static struct cxl_mock_res *alloc_mock_res(resource_size_t size) +{ + struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL); + struct genpool_data_align data = { + .align = SZ_256M, + }; + unsigned long phys; + + INIT_LIST_HEAD(&res->list); + phys = gen_pool_alloc_algo(cxl_mock_pool, size, + gen_pool_first_fit_align, &data); + if (!phys) + return NULL; + + res->range = (struct range) { + .start = phys, + .end = phys + size - 1, + }; + mutex_lock(&mock_res_lock); + list_add(&res->list, &mock_res); + mutex_unlock(&mock_res_lock); + + return res; +} + +static int populate_cedt(void) +{ + struct acpi_cedt_cfmws *cfmws[4] = { + [0] = &mock_cedt.cfmws0.cfmws, + [1] = &mock_cedt.cfmws1.cfmws, + [2] = &mock_cedt.cfmws2.cfmws, + [3] = &mock_cedt.cfmws3.cfmws, + }; + struct cxl_mock_res *res; + int i; + + for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) { + struct acpi_cedt_chbs *chbs = &mock_cedt.chbs[i]; + resource_size_t size; + + if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL20) + size = ACPI_CEDT_CHBS_LENGTH_CXL20; + else + size = ACPI_CEDT_CHBS_LENGTH_CXL11; + + res = alloc_mock_res(size); + if (!res) + return -ENOMEM; + chbs->base = res->range.start; + chbs->length = size; + } + + for (i = 0; i < ARRAY_SIZE(cfmws); i++) { + struct acpi_cedt_cfmws *window = cfmws[i]; + + res = alloc_mock_res(window->window_size); + if (!res) + return -ENOMEM; + window->base_hpa = res->range.start; + } + + return 0; +} + +static acpi_status mock_acpi_get_table(char *signature, u32 instance, + struct acpi_table_header **out_table) +{ + if (instance < U32_MAX || strcmp(signature, ACPI_SIG_CEDT) != 0) + return acpi_get_table(signature, instance, out_table); + + *out_table = (struct acpi_table_header *) &mock_cedt; + return AE_OK; +} + +static void mock_acpi_put_table(struct acpi_table_header *table) +{ + if (table == (struct acpi_table_header *) &mock_cedt) + return; + acpi_put_table(table); +} + +static bool is_mock_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) + if (dev == &cxl_host_bridge[i]->dev) + return true; + + return false; +} + +static int host_bridge_index(struct acpi_device *adev) +{ + return adev - host_bridge; +} + +static struct acpi_device *find_host_bridge(acpi_handle handle) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(host_bridge); i++) + if (handle == host_bridge[i].handle) + return &host_bridge[i]; + return NULL; +} + +static acpi_status +mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, + struct acpi_object_list *arguments, + unsigned long long *data) +{ + struct acpi_device *adev = find_host_bridge(handle); + + if (!adev || strcmp(pathname, METHOD_NAME__UID) != 0) + return acpi_evaluate_integer(handle, pathname, arguments, data); + + *data = host_bridge_index(adev); + return AE_OK; +} + +static struct pci_bus mock_pci_bus[NR_CXL_HOST_BRIDGES]; +static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { + [0] = { + .bus = &mock_pci_bus[0], + }, + [1] = { + .bus = &mock_pci_bus[1], + }, + [2] = { + .bus = &mock_pci_bus[2], + }, + [3] = { + .bus = &mock_pci_bus[3], + }, +}; + +static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++) + if (bus == &mock_pci_bus[i]) + return cxl_root_port[index + i * NR_CXL_ROOT_PORTS]; + return NULL; +} + +static bool is_mock_port(struct platform_device *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) + if (pdev == cxl_root_port[i]) + return true; + return false; +} + +static bool is_mock_bus(struct pci_bus *bus) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++) + if (bus == &mock_pci_bus[i]) + return true; + return false; +} + +static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) +{ + struct acpi_device *adev = find_host_bridge(handle); + + if (!adev) + return acpi_pci_find_root(handle); + return &mock_pci_root[host_bridge_index(adev)]; +} + +static struct cxl_mock_ops cxl_mock_ops = { + .is_mock_adev = is_mock_adev, + .is_mock_bridge = is_mock_bridge, + .is_mock_bus = is_mock_bus, + .is_mock_port = is_mock_port, + .is_mock_dev = is_mock_dev, + .mock_port = mock_cxl_root_port, + .acpi_get_table = mock_acpi_get_table, + .acpi_put_table = mock_acpi_put_table, + .acpi_evaluate_integer = mock_acpi_evaluate_integer, + .acpi_pci_find_root = mock_acpi_pci_find_root, + .list = LIST_HEAD_INIT(cxl_mock_ops.list), +}; + +static void mock_companion(struct acpi_device *adev, struct device *dev) +{ + device_initialize(&adev->dev); + fwnode_init(&adev->fwnode, NULL); + dev->fwnode = &adev->fwnode; + adev->fwnode.dev = dev; +} + +#ifndef SZ_64G +#define SZ_64G (SZ_32G * 2) +#endif + +#ifndef SZ_512G +#define SZ_512G (SZ_64G * 8) +#endif + +static struct platform_device *alloc_memdev(int id) +{ + struct resource res[] = { + [0] = { + .flags = IORESOURCE_MEM, + }, + [1] = { + .flags = IORESOURCE_MEM, + .desc = IORES_DESC_PERSISTENT_MEMORY, + }, + }; + struct platform_device *pdev; + int i, rc; + + for (i = 0; i < ARRAY_SIZE(res); i++) { + struct cxl_mock_res *r = alloc_mock_res(SZ_256M); + + if (!r) + return NULL; + res[i].start = r->range.start; + res[i].end = r->range.end; + } + + pdev = platform_device_alloc("cxl_mem", id); + if (!pdev) + return NULL; + + rc = platform_device_add_resources(pdev, res, ARRAY_SIZE(res)); + if (rc) + goto err; + + return pdev; + +err: + platform_device_put(pdev); + return NULL; +} + +static __init int cxl_test_init(void) +{ + int rc, i; + + register_cxl_mock_ops(&cxl_mock_ops); + + cxl_mock_pool = gen_pool_create(ilog2(SZ_2M), NUMA_NO_NODE); + if (!cxl_mock_pool) { + rc = -ENOMEM; + goto err_gen_pool_create; + } + + rc = gen_pool_add(cxl_mock_pool, SZ_512G, SZ_64G, NUMA_NO_NODE); + if (rc) + goto err_gen_pool_add; + + rc = populate_cedt(); + if (rc) + goto err_populate; + + for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) { + struct acpi_device *adev = &host_bridge[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", i); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + cxl_host_bridge[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { + struct platform_device *bridge = + cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_root_port", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &bridge->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_port; + } + cxl_root_port[i] = pdev; + } + + BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port)); + for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { + struct platform_device *port = cxl_root_port[i]; + struct platform_device *pdev; + + pdev = alloc_memdev(i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem[i] = pdev; + } + + cxl_acpi = platform_device_alloc("cxl_acpi", 0); + if (!cxl_acpi) + goto err_mem; + + mock_companion(&acpi0017_mock, &cxl_acpi->dev); + acpi0017_mock.dev.bus = &platform_bus_type; + + rc = platform_device_add(cxl_acpi); + if (rc) + goto err_add; + + return 0; + +err_add: + platform_device_put(cxl_acpi); +err_mem: + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); +err_port: + for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_port[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + platform_device_unregister(cxl_host_bridge[i]); +err_populate: + depopulate_all_mock_resources(); +err_gen_pool_add: + gen_pool_destroy(cxl_mock_pool); +err_gen_pool_create: + unregister_cxl_mock_ops(&cxl_mock_ops); + return rc; +} + +static __exit void cxl_test_exit(void) +{ + int i; + + platform_device_unregister(cxl_acpi); + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); + for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_port[i]); + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + platform_device_unregister(cxl_host_bridge[i]); + depopulate_all_mock_resources(); + gen_pool_destroy(cxl_mock_pool); + unregister_cxl_mock_ops(&cxl_mock_ops); +} + +module_init(cxl_test_init); +module_exit(cxl_test_exit); +MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c new file mode 100644 index 000000000000..12a8437a9ca0 --- /dev/null +++ b/tools/testing/cxl/test/mem.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2021 Intel Corporation. All rights reserved. + +#include <linux/platform_device.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/sizes.h> +#include <linux/bits.h> +#include <cxlmem.h> + +#define LSA_SIZE SZ_128K +#define EFFECT(x) (1U << x) + +static struct cxl_cel_entry mock_cel[] = { + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS), + .effect = cpu_to_le16(0), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY), + .effect = cpu_to_le16(0), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_LSA), + .effect = cpu_to_le16(0), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA), + .effect = cpu_to_le16(EFFECT(1) | EFFECT(2)), + }, +}; + +static struct { + struct cxl_mbox_get_supported_logs gsl; + struct cxl_gsl_entry entry; +} mock_gsl_payload = { + .gsl = { + .entries = cpu_to_le16(1), + }, + .entry = { + .uuid = DEFINE_CXL_CEL_UUID, + .size = cpu_to_le32(sizeof(mock_cel)), + }, +}; + +static int mock_gsl(struct cxl_mbox_cmd *cmd) +{ + if (cmd->size_out < sizeof(mock_gsl_payload)) + return -EINVAL; + + memcpy(cmd->payload_out, &mock_gsl_payload, sizeof(mock_gsl_payload)); + cmd->size_out = sizeof(mock_gsl_payload); + + return 0; +} + +static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_log *gl = cmd->payload_in; + u32 offset = le32_to_cpu(gl->offset); + u32 length = le32_to_cpu(gl->length); + uuid_t uuid = DEFINE_CXL_CEL_UUID; + void *data = &mock_cel; + + if (cmd->size_in < sizeof(*gl)) + return -EINVAL; + if (length > cxlm->payload_size) + return -EINVAL; + if (offset + length > sizeof(mock_cel)) + return -EINVAL; + if (!uuid_equal(&gl->uuid, &uuid)) + return -EINVAL; + if (length > cmd->size_out) + return -EINVAL; + + memcpy(cmd->payload_out, data + offset, length); + + return 0; +} + +static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +{ + struct platform_device *pdev = to_platform_device(cxlm->dev); + struct cxl_mbox_identify id = { + .fw_revision = { "mock fw v1 " }, + .lsa_size = cpu_to_le32(LSA_SIZE), + /* FIXME: Add partition support */ + .partition_align = cpu_to_le64(0), + }; + u64 capacity = 0; + int i; + + if (cmd->size_out < sizeof(id)) + return -EINVAL; + + for (i = 0; i < 2; i++) { + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, i); + if (!res) + break; + + capacity += resource_size(res) / CXL_CAPACITY_MULTIPLIER; + + if (le64_to_cpu(id.partition_align)) + continue; + + if (res->desc == IORES_DESC_PERSISTENT_MEMORY) + id.persistent_capacity = cpu_to_le64( + resource_size(res) / CXL_CAPACITY_MULTIPLIER); + else + id.volatile_capacity = cpu_to_le64( + resource_size(res) / CXL_CAPACITY_MULTIPLIER); + } + + id.total_capacity = cpu_to_le64(capacity); + + memcpy(cmd->payload_out, &id, sizeof(id)); + + return 0; +} + +static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; + void *lsa = dev_get_drvdata(cxlm->dev); + u32 offset, length; + + if (sizeof(*get_lsa) > cmd->size_in) + return -EINVAL; + offset = le32_to_cpu(get_lsa->offset); + length = le32_to_cpu(get_lsa->length); + if (offset + length > LSA_SIZE) + return -EINVAL; + if (length > cmd->size_out) + return -EINVAL; + + memcpy(cmd->payload_out, lsa + offset, length); + return 0; +} + +static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; + void *lsa = dev_get_drvdata(cxlm->dev); + u32 offset, length; + + if (sizeof(*set_lsa) > cmd->size_in) + return -EINVAL; + offset = le32_to_cpu(set_lsa->offset); + length = cmd->size_in - sizeof(*set_lsa); + if (offset + length > LSA_SIZE) + return -EINVAL; + + memcpy(lsa + offset, &set_lsa->data[0], length); + return 0; +} + +static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +{ + struct device *dev = cxlm->dev; + int rc = -EIO; + + switch (cmd->opcode) { + case CXL_MBOX_OP_GET_SUPPORTED_LOGS: + rc = mock_gsl(cmd); + break; + case CXL_MBOX_OP_GET_LOG: + rc = mock_get_log(cxlm, cmd); + break; + case CXL_MBOX_OP_IDENTIFY: + rc = mock_id(cxlm, cmd); + break; + case CXL_MBOX_OP_GET_LSA: + rc = mock_get_lsa(cxlm, cmd); + break; + case CXL_MBOX_OP_SET_LSA: + rc = mock_set_lsa(cxlm, cmd); + break; + default: + break; + } + + dev_dbg(dev, "opcode: %#x sz_in: %zd sz_out: %zd rc: %d\n", cmd->opcode, + cmd->size_in, cmd->size_out, rc); + + return rc; +} + +static void label_area_release(void *lsa) +{ + vfree(lsa); +} + +static int cxl_mock_mem_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct cxl_memdev *cxlmd; + struct cxl_mem *cxlm; + void *lsa; + int rc; + + lsa = vmalloc(LSA_SIZE); + if (!lsa) + return -ENOMEM; + rc = devm_add_action_or_reset(dev, label_area_release, lsa); + if (rc) + return rc; + dev_set_drvdata(dev, lsa); + + cxlm = cxl_mem_create(dev); + if (IS_ERR(cxlm)) + return PTR_ERR(cxlm); + + cxlm->mbox_send = cxl_mock_mbox_send; + cxlm->payload_size = SZ_4K; + + rc = cxl_mem_enumerate_cmds(cxlm); + if (rc) + return rc; + + rc = cxl_mem_identify(cxlm); + if (rc) + return rc; + + rc = cxl_mem_create_range_info(cxlm); + if (rc) + return rc; + + cxlmd = devm_cxl_add_memdev(cxlm); + if (IS_ERR(cxlmd)) + return PTR_ERR(cxlmd); + + if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) + rc = devm_cxl_add_nvdimm(dev, cxlmd); + + return 0; +} + +static const struct platform_device_id cxl_mock_mem_ids[] = { + { .name = "cxl_mem", }, + { }, +}; +MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids); + +static struct platform_driver cxl_mock_mem_driver = { + .probe = cxl_mock_mem_probe, + .id_table = cxl_mock_mem_ids, + .driver = { + .name = KBUILD_MODNAME, + }, +}; + +module_platform_driver(cxl_mock_mem_driver); +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c new file mode 100644 index 000000000000..b8c108abcf07 --- /dev/null +++ b/tools/testing/cxl/test/mock.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +//Copyright(c) 2021 Intel Corporation. All rights reserved. + +#include <linux/libnvdimm.h> +#include <linux/rculist.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/acpi.h> +#include <linux/pci.h> +#include "mock.h" + +static LIST_HEAD(mock); + +void register_cxl_mock_ops(struct cxl_mock_ops *ops) +{ + list_add_rcu(&ops->list, &mock); +} +EXPORT_SYMBOL_GPL(register_cxl_mock_ops); + +static DEFINE_SRCU(cxl_mock_srcu); + +void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) +{ + list_del_rcu(&ops->list); + synchronize_srcu(&cxl_mock_srcu); +} +EXPORT_SYMBOL_GPL(unregister_cxl_mock_ops); + +struct cxl_mock_ops *get_cxl_mock_ops(int *index) +{ + *index = srcu_read_lock(&cxl_mock_srcu); + return list_first_or_null_rcu(&mock, struct cxl_mock_ops, list); +} +EXPORT_SYMBOL_GPL(get_cxl_mock_ops); + +void put_cxl_mock_ops(int index) +{ + srcu_read_unlock(&cxl_mock_srcu, index); +} +EXPORT_SYMBOL_GPL(put_cxl_mock_ops); + +bool __wrap_is_acpi_device_node(const struct fwnode_handle *fwnode) +{ + struct acpi_device *adev = + container_of(fwnode, struct acpi_device, fwnode); + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + bool retval = false; + + if (ops) + retval = ops->is_mock_adev(adev); + + if (!retval) + retval = is_acpi_device_node(fwnode); + + put_cxl_mock_ops(index); + return retval; +} +EXPORT_SYMBOL(__wrap_is_acpi_device_node); + +acpi_status __wrap_acpi_get_table(char *signature, u32 instance, + struct acpi_table_header **out_table) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + acpi_status status; + + if (ops) + status = ops->acpi_get_table(signature, instance, out_table); + else + status = acpi_get_table(signature, instance, out_table); + + put_cxl_mock_ops(index); + + return status; +} +EXPORT_SYMBOL(__wrap_acpi_get_table); + +void __wrap_acpi_put_table(struct acpi_table_header *table) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + ops->acpi_put_table(table); + else + acpi_put_table(table); + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL(__wrap_acpi_put_table); + +acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, + acpi_string pathname, + struct acpi_object_list *arguments, + unsigned long long *data) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + acpi_status status; + + if (ops) + status = ops->acpi_evaluate_integer(handle, pathname, arguments, + data); + else + status = acpi_evaluate_integer(handle, pathname, arguments, + data); + put_cxl_mock_ops(index); + + return status; +} +EXPORT_SYMBOL(__wrap_acpi_evaluate_integer); + +struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) +{ + int index; + struct acpi_pci_root *root; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + root = ops->acpi_pci_find_root(handle); + else + root = acpi_pci_find_root(handle); + + put_cxl_mock_ops(index); + + return root; +} +EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root); + +void __wrap_pci_walk_bus(struct pci_bus *bus, + int (*cb)(struct pci_dev *, void *), void *userdata) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_bus(bus)) { + int rc, i; + + /* + * Simulate 2 root ports per host-bridge and no + * depth recursion. + */ + for (i = 0; i < 2; i++) { + rc = cb((struct pci_dev *) ops->mock_port(bus, i), + userdata); + if (rc) + break; + } + } else + pci_walk_bus(bus, cb, userdata); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus); + +struct nvdimm_bus * +__wrap_nvdimm_bus_register(struct device *dev, + struct nvdimm_bus_descriptor *nd_desc) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(dev->parent->parent)) + nd_desc->provider_name = "cxl_test"; + put_cxl_mock_ops(index); + + return nvdimm_bus_register(dev, nd_desc); +} +EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); + +MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h new file mode 100644 index 000000000000..805a94cb3fbe --- /dev/null +++ b/tools/testing/cxl/test/mock.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/list.h> +#include <linux/acpi.h> + +struct cxl_mock_ops { + struct list_head list; + bool (*is_mock_adev)(struct acpi_device *dev); + acpi_status (*acpi_get_table)(char *signature, u32 instance, + struct acpi_table_header **out_table); + void (*acpi_put_table)(struct acpi_table_header *table); + bool (*is_mock_bridge)(struct device *dev); + acpi_status (*acpi_evaluate_integer)(acpi_handle handle, + acpi_string pathname, + struct acpi_object_list *arguments, + unsigned long long *data); + struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); + struct platform_device *(*mock_port)(struct pci_bus *bus, int index); + bool (*is_mock_bus)(struct pci_bus *bus); + bool (*is_mock_port)(struct platform_device *pdev); + bool (*is_mock_dev)(struct device *dev); +}; + +void register_cxl_mock_ops(struct cxl_mock_ops *ops); +void unregister_cxl_mock_ops(struct cxl_mock_ops *ops); +struct cxl_mock_ops *get_cxl_mock_ops(int *index); +void put_cxl_mock_ops(int index); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 5a931456e718..68e6f461c758 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -8,17 +8,17 @@ # Author: Brendan Higgins <brendanhiggins@google.com> import argparse -import sys import os +import re +import sys import time assert sys.version_info >= (3, 7), "Python version is too old" from collections import namedtuple from enum import Enum, auto -from typing import Iterable +from typing import Iterable, Sequence, List -import kunit_config import kunit_json import kunit_kernel import kunit_parser @@ -31,13 +31,13 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', ['jobs', 'build_dir', 'alltests', 'make_options']) KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests', - 'filter_glob', 'kernel_args']) + ['timeout', 'build_dir', 'alltests', + 'filter_glob', 'kernel_args', 'run_isolated']) KunitParseRequest = namedtuple('KunitParseRequest', - ['raw_output', 'input_data', 'build_dir', 'json']) + ['raw_output', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'alltests', 'filter_glob', - 'kernel_args', 'json', 'make_options']) + 'kernel_args', 'run_isolated', 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -91,31 +91,93 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree, 'built kernel successfully', build_end - build_start) -def exec_tests(linux: kunit_kernel.LinuxSourceTree, - request: KunitExecRequest) -> KunitResult: - kunit_parser.print_with_timestamp('Starting KUnit Kernel ...') - test_start = time.time() - result = linux.run_kernel( - args=request.kernel_args, - timeout=None if request.alltests else request.timeout, - filter_glob=request.filter_glob, - build_dir=request.build_dir) - - test_end = time.time() - - return KunitResult(KunitStatus.SUCCESS, - result, - test_end - test_start) +def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]: + args = ['kunit.action=list'] + if request.kernel_args: + args.extend(request.kernel_args) + + output = linux.run_kernel(args=args, + timeout=None if request.alltests else request.timeout, + filter_glob=request.filter_glob, + build_dir=request.build_dir) + lines = kunit_parser.extract_tap_lines(output) + # Hack! Drop the dummy TAP version header that the executor prints out. + lines.pop() + + # Filter out any extraneous non-test output that might have gotten mixed in. + return [l for l in lines if re.match('^[^\s.]+\.[^\s.]+$', l)] + +def _suites_from_test_list(tests: List[str]) -> List[str]: + """Extracts all the suites from an ordered list of tests.""" + suites = [] # type: List[str] + for t in tests: + parts = t.split('.', maxsplit=2) + if len(parts) != 2: + raise ValueError(f'internal KUnit error, test name should be of the form "<suite>.<test>", got "{t}"') + suite, case = parts + if not suites or suites[-1] != suite: + suites.append(suite) + return suites + + + +def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest, + parse_request: KunitParseRequest) -> KunitResult: + filter_globs = [request.filter_glob] + if request.run_isolated: + tests = _list_tests(linux, request) + if request.run_isolated == 'test': + filter_globs = tests + if request.run_isolated == 'suite': + filter_globs = _suites_from_test_list(tests) + # Apply the test-part of the user's glob, if present. + if '.' in request.filter_glob: + test_glob = request.filter_glob.split('.', maxsplit=2)[1] + filter_globs = [g + '.'+ test_glob for g in filter_globs] + + test_counts = kunit_parser.TestCounts() + exec_time = 0.0 + for i, filter_glob in enumerate(filter_globs): + kunit_parser.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs))) + + test_start = time.time() + run_result = linux.run_kernel( + args=request.kernel_args, + timeout=None if request.alltests else request.timeout, + filter_glob=filter_glob, + build_dir=request.build_dir) + + result = parse_tests(parse_request, run_result) + # run_kernel() doesn't block on the kernel exiting. + # That only happens after we get the last line of output from `run_result`. + # So exec_time here actually contains parsing + execution time, which is fine. + test_end = time.time() + exec_time += test_end - test_start + + test_counts.add_subtest_counts(result.result.test.counts) + + kunit_status = _map_to_overall_status(test_counts.get_status()) + return KunitResult(status=kunit_status, result=result.result, elapsed_time=exec_time) + +def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: + if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED): + return KunitStatus.SUCCESS + else: + return KunitStatus.TEST_FAILURE -def parse_tests(request: KunitParseRequest) -> KunitResult: +def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult: parse_start = time.time() test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, - [], + kunit_parser.Test(), 'Tests not Parsed.') if request.raw_output: - output: Iterable[str] = request.input_data + # Treat unparsed results as one passing test. + test_result.test.status = kunit_parser.TestStatus.SUCCESS + test_result.test.counts.passed = 1 + + output: Iterable[str] = input_data if request.raw_output == 'all': pass elif request.raw_output == 'kunit': @@ -126,7 +188,7 @@ def parse_tests(request: KunitParseRequest) -> KunitResult: print(line.rstrip()) else: - test_result = kunit_parser.parse_run_tests(request.input_data) + test_result = kunit_parser.parse_run_tests(input_data) parse_end = time.time() if request.json: @@ -164,16 +226,12 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_request = KunitExecRequest(request.timeout, request.build_dir, request.alltests, request.filter_glob, - request.kernel_args) - exec_result = exec_tests(linux, exec_request) - if exec_result.status != KunitStatus.SUCCESS: - return exec_result - + request.kernel_args, request.run_isolated) parse_request = KunitParseRequest(request.raw_output, - exec_result.result, request.build_dir, request.json) - parse_result = parse_tests(parse_request) + + exec_result = exec_tests(linux, exec_request, parse_request) run_end = time.time() @@ -184,7 +242,27 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, config_result.elapsed_time, build_result.elapsed_time, exec_result.elapsed_time)) - return parse_result + return exec_result + +# Problem: +# $ kunit.py run --json +# works as one would expect and prints the parsed test results as JSON. +# $ kunit.py run --json suite_name +# would *not* pass suite_name as the filter_glob and print as json. +# argparse will consider it to be another way of writing +# $ kunit.py run --json=suite_name +# i.e. it would run all tests, and dump the json to a `suite_name` file. +# So we hackily automatically rewrite --json => --json=stdout +pseudo_bool_flag_defaults = { + '--json': 'stdout', + '--raw_output': 'kunit', +} +def massage_argv(argv: Sequence[str]) -> Sequence[str]: + def massage_arg(arg: str) -> str: + if arg not in pseudo_bool_flag_defaults: + return arg + return f'{arg}={pseudo_bool_flag_defaults[arg]}' + return list(map(massage_arg, argv)) def add_common_opts(parser) -> None: parser.add_argument('--build_dir', @@ -243,9 +321,8 @@ def add_exec_opts(parser) -> None: default=300, metavar='timeout') parser.add_argument('filter_glob', - help='maximum number of seconds to allow for all tests ' - 'to run. This does not include time taken to build the ' - 'tests.', + help='Filter which KUnit test suites/tests run at ' + 'boot-time, e.g. list* or list*.*del_test', type=str, nargs='?', default='', @@ -253,6 +330,12 @@ def add_exec_opts(parser) -> None: parser.add_argument('--kernel_args', help='Kernel command-line parameters. Maybe be repeated', action='append') + parser.add_argument('--run_isolated', help='If set, boot the kernel for each ' + 'individual suite/test. This is can be useful for debugging ' + 'a non-hermetic test, one that might pass/fail based on ' + 'what ran before it.', + type=str, + choices=['suite', 'test']), def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='If set don\'t format output from kernel. ' @@ -303,7 +386,7 @@ def main(argv, linux=None): help='Specifies the file to read results from.', type=str, nargs='?', metavar='input_file') - cli_args = parser.parse_args(argv) + cli_args = parser.parse_args(massage_argv(argv)) if get_kernel_root_path(): os.chdir(get_kernel_root_path()) @@ -326,6 +409,7 @@ def main(argv, linux=None): cli_args.alltests, cli_args.filter_glob, cli_args.kernel_args, + cli_args.run_isolated, cli_args.json, cli_args.make_options) result = run_tests(linux, request) @@ -381,29 +465,27 @@ def main(argv, linux=None): cli_args.build_dir, cli_args.alltests, cli_args.filter_glob, - cli_args.kernel_args) - exec_result = exec_tests(linux, exec_request) + cli_args.kernel_args, + cli_args.run_isolated) parse_request = KunitParseRequest(cli_args.raw_output, - exec_result.result, cli_args.build_dir, cli_args.json) - result = parse_tests(parse_request) + result = exec_tests(linux, exec_request, parse_request) kunit_parser.print_with_timestamp(( - 'Elapsed time: %.3fs\n') % ( - exec_result.elapsed_time)) + 'Elapsed time: %.3fs\n') % (result.elapsed_time)) if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'parse': if cli_args.file == None: + sys.stdin.reconfigure(errors='backslashreplace') # pytype: disable=attribute-error kunit_output = sys.stdin else: - with open(cli_args.file, 'r') as f: + with open(cli_args.file, 'r', errors='backslashreplace') as f: kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, - kunit_output, None, cli_args.json) - result = parse_tests(request) + result = parse_tests(request, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) else: diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index f5cca5c38cac..746bec72b9ac 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -11,47 +11,47 @@ import os import kunit_parser -from kunit_parser import TestStatus - -def get_json_result(test_result, def_config, build_dir, json_path) -> str: - sub_groups = [] - - # Each test suite is mapped to a KernelCI sub_group - for test_suite in test_result.suites: - sub_group = { - "name": test_suite.name, - "arch": "UM", - "defconfig": def_config, - "build_environment": build_dir, - "test_cases": [], - "lab_name": None, - "kernel": None, - "job": None, - "git_branch": "kselftest", - } - test_cases = [] - # TODO: Add attachments attribute in test_case with detailed - # failure message, see https://api.kernelci.org/schema-test-case.html#get - for case in test_suite.cases: - test_case = {"name": case.name, "status": "FAIL"} - if case.status == TestStatus.SUCCESS: +from kunit_parser import Test, TestResult, TestStatus +from typing import Any, Dict, Optional + +JsonObj = Dict[str, Any] + +def _get_group_json(test: Test, def_config: str, + build_dir: Optional[str]) -> JsonObj: + sub_groups = [] # List[JsonObj] + test_cases = [] # List[JsonObj] + + for subtest in test.subtests: + if len(subtest.subtests): + sub_group = _get_group_json(subtest, def_config, + build_dir) + sub_groups.append(sub_group) + else: + test_case = {"name": subtest.name, "status": "FAIL"} + if subtest.status == TestStatus.SUCCESS: test_case["status"] = "PASS" - elif case.status == TestStatus.TEST_CRASHED: + elif subtest.status == TestStatus.TEST_CRASHED: test_case["status"] = "ERROR" test_cases.append(test_case) - sub_group["test_cases"] = test_cases - sub_groups.append(sub_group) + test_group = { - "name": "KUnit Test Group", + "name": test.name, "arch": "UM", "defconfig": def_config, "build_environment": build_dir, "sub_groups": sub_groups, + "test_cases": test_cases, "lab_name": None, "kernel": None, "job": None, "git_branch": "kselftest", } + return test_group + +def get_json_result(test_result: TestResult, def_config: str, + build_dir: Optional[str], json_path: str) -> str: + test_group = _get_group_json(test_result.test, def_config, build_dir) + test_group["name"] = "KUnit Test Group" json_obj = json.dumps(test_group, indent=4) if json_path != 'stdout': with open(json_path, 'w') as result_path: diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 2c6f916ccbaf..66095568bf32 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -12,11 +12,8 @@ import subprocess import os import shutil import signal -from typing import Iterator, Optional, Tuple - -from contextlib import ExitStack - -from collections import namedtuple +import threading +from typing import Iterator, List, Optional, Tuple import kunit_config import kunit_parser @@ -103,8 +100,8 @@ class LinuxSourceTreeOperations(object): if stderr: # likely only due to build warnings print(stderr.decode()) - def run(self, params, timeout, build_dir, outfile) -> None: - pass + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: + raise RuntimeError('not implemented!') class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): @@ -123,7 +120,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): kconfig.parse_from_string(self._kconfig) base_kunitconfig.merge_in_entries(kconfig) - def run(self, params, timeout, build_dir, outfile): + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: kernel_path = os.path.join(build_dir, self._kernel_path) qemu_command = ['qemu-system-' + self._qemu_arch, '-nodefaults', @@ -134,18 +131,11 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): '-nographic', '-serial stdio'] + self._extra_qemu_params print('Running tests with:\n$', ' '.join(qemu_command)) - with open(outfile, 'w') as output: - process = subprocess.Popen(' '.join(qemu_command), - stdin=subprocess.PIPE, - stdout=output, - stderr=subprocess.STDOUT, - text=True, shell=True) - try: - process.wait(timeout=timeout) - except Exception as e: - print(e) - process.terminate() - return process + return subprocess.Popen(' '.join(qemu_command), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, shell=True, errors='backslashreplace') class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): """An abstraction over command line operations performed on a source tree.""" @@ -168,24 +158,21 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): process.wait() kunit_parser.print_with_timestamp( 'Disabling broken configs to run KUnit tests...') - with ExitStack() as es: - config = open(get_kconfig_path(build_dir), 'a') - disable = open(BROKEN_ALLCONFIG_PATH, 'r').read() - config.write(disable) + + with open(get_kconfig_path(build_dir), 'a') as config: + with open(BROKEN_ALLCONFIG_PATH, 'r') as disable: + config.write(disable.read()) kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def run(self, params, timeout, build_dir, outfile): + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = get_file_path(build_dir, 'linux') - outfile = get_outfile_path(build_dir) - with open(outfile, 'w') as output: - process = subprocess.Popen([linux_bin] + params, - stdin=subprocess.PIPE, - stdout=output, - stderr=subprocess.STDOUT, - text=True) - process.wait(timeout) + return subprocess.Popen([linux_bin] + params, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, errors='backslashreplace') def get_kconfig_path(build_dir) -> str: return get_file_path(build_dir, KCONFIG_PATH) @@ -202,8 +189,9 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT return LinuxSourceTreeOperationsUml(cross_compile=cross_compile) elif os.path.isfile(config_path): return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1] - else: - raise ConfigError(arch + ' is not a valid arch') + + options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] + raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) def get_source_tree_ops_from_qemu_config(config_path: str, cross_compile: Optional[str]) -> Tuple[ @@ -219,12 +207,15 @@ def get_source_tree_ops_from_qemu_config(config_path: str, module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path)) spec = importlib.util.spec_from_file_location(module_path, config_path) config = importlib.util.module_from_spec(spec) - # TODO(brendanhiggins@google.com): I looked this up and apparently other - # Python projects have noted that pytype complains that "No attribute - # 'exec_module' on _importlib_modulespec._Loader". Disabling for now. - spec.loader.exec_module(config) # pytype: disable=attribute-error - return config.QEMU_ARCH.linux_arch, LinuxSourceTreeOperationsQemu( - config.QEMU_ARCH, cross_compile=cross_compile) + # See https://github.com/python/typeshed/pull/2626 for context. + assert isinstance(spec.loader, importlib.abc.Loader) + spec.loader.exec_module(config) + + if not hasattr(config, 'QEMU_ARCH'): + raise ValueError('qemu_config module missing "QEMU_ARCH": ' + config_path) + params: qemu_config.QemuArchParams = config.QEMU_ARCH # type: ignore + return params.linux_arch, LinuxSourceTreeOperationsQemu( + params, cross_compile=cross_compile) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" @@ -330,12 +321,36 @@ class LinuxSourceTree(object): args.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt']) if filter_glob: args.append('kunit.filter_glob='+filter_glob) - outfile = get_outfile_path(build_dir) - self._ops.run(args, timeout, build_dir, outfile) - subprocess.call(['stty', 'sane']) - with open(outfile, 'r') as file: - for line in file: + + process = self._ops.start(args, build_dir) + assert process.stdout is not None # tell mypy it's set + + # Enforce the timeout in a background thread. + def _wait_proc(): + try: + process.wait(timeout=timeout) + except Exception as e: + print(e) + process.terminate() + process.wait() + waiter = threading.Thread(target=_wait_proc) + waiter.start() + + output = open(get_outfile_path(build_dir), 'w') + try: + # Tee the output to the file and to our caller in real time. + for line in process.stdout: + output.write(line) yield line + # This runs even if our caller doesn't consume every line. + finally: + # Flush any leftover output to the file + output.write(process.stdout.read()) + output.close() + process.stdout.close() + + waiter.join() + subprocess.call(['stty', 'sane']) def signal_handler(self, sig, frame) -> None: logging.error('Build interruption occurred. Cleaning console.') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 6310a641b151..3355196d0515 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -1,11 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 # -# Parses test results from a kernel dmesg log. +# Parses KTAP test results from a kernel dmesg log and incrementally prints +# results with reader-friendly format. Stores and returns test results in a +# Test object. # # Copyright (C) 2019, Google LLC. # Author: Felix Guo <felixguoxiuping@gmail.com> # Author: Brendan Higgins <brendanhiggins@google.com> +# Author: Rae Moar <rmoar@google.com> +from __future__ import annotations import re from collections import namedtuple @@ -14,33 +18,52 @@ from enum import Enum, auto from functools import reduce from typing import Iterable, Iterator, List, Optional, Tuple -TestResult = namedtuple('TestResult', ['status','suites','log']) - -class TestSuite(object): +TestResult = namedtuple('TestResult', ['status','test','log']) + +class Test(object): + """ + A class to represent a test parsed from KTAP results. All KTAP + results within a test log are stored in a main Test object as + subtests. + + Attributes: + status : TestStatus - status of the test + name : str - name of the test + expected_count : int - expected number of subtests (0 if single + test case and None if unknown expected number of subtests) + subtests : List[Test] - list of subtests + log : List[str] - log of KTAP lines that correspond to the test + counts : TestCounts - counts of the test statuses and errors of + subtests or of the test itself if the test is a single + test case. + """ def __init__(self) -> None: - self.status = TestStatus.SUCCESS - self.name = '' - self.cases = [] # type: List[TestCase] - - def __str__(self) -> str: - return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')' - - def __repr__(self) -> str: - return str(self) - -class TestCase(object): - def __init__(self) -> None: - self.status = TestStatus.SUCCESS + """Creates Test object with default attributes.""" + self.status = TestStatus.TEST_CRASHED self.name = '' + self.expected_count = 0 # type: Optional[int] + self.subtests = [] # type: List[Test] self.log = [] # type: List[str] + self.counts = TestCounts() def __str__(self) -> str: - return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')' + """Returns string representation of a Test class object.""" + return ('Test(' + str(self.status) + ', ' + self.name + + ', ' + str(self.expected_count) + ', ' + + str(self.subtests) + ', ' + str(self.log) + ', ' + + str(self.counts) + ')') def __repr__(self) -> str: + """Returns string representation of a Test class object.""" return str(self) + def add_error(self, error_message: str) -> None: + """Records an error that occurred while parsing this test.""" + self.counts.errors += 1 + print_error('Test ' + self.name + ': ' + error_message) + class TestStatus(Enum): + """An enumeration class to represent the status of a test.""" SUCCESS = auto() FAILURE = auto() SKIPPED = auto() @@ -48,381 +71,747 @@ class TestStatus(Enum): NO_TESTS = auto() FAILURE_TO_PARSE_TESTS = auto() +class TestCounts: + """ + Tracks the counts of statuses of all test cases and any errors within + a Test. + + Attributes: + passed : int - the number of tests that have passed + failed : int - the number of tests that have failed + crashed : int - the number of tests that have crashed + skipped : int - the number of tests that have skipped + errors : int - the number of errors in the test and subtests + """ + def __init__(self): + """Creates TestCounts object with counts of all test + statuses and test errors set to 0. + """ + self.passed = 0 + self.failed = 0 + self.crashed = 0 + self.skipped = 0 + self.errors = 0 + + def __str__(self) -> str: + """Returns the string representation of a TestCounts object. + """ + return ('Passed: ' + str(self.passed) + + ', Failed: ' + str(self.failed) + + ', Crashed: ' + str(self.crashed) + + ', Skipped: ' + str(self.skipped) + + ', Errors: ' + str(self.errors)) + + def total(self) -> int: + """Returns the total number of test cases within a test + object, where a test case is a test with no subtests. + """ + return (self.passed + self.failed + self.crashed + + self.skipped) + + def add_subtest_counts(self, counts: TestCounts) -> None: + """ + Adds the counts of another TestCounts object to the current + TestCounts object. Used to add the counts of a subtest to the + parent test. + + Parameters: + counts - a different TestCounts object whose counts + will be added to the counts of the TestCounts object + """ + self.passed += counts.passed + self.failed += counts.failed + self.crashed += counts.crashed + self.skipped += counts.skipped + self.errors += counts.errors + + def get_status(self) -> TestStatus: + """Returns the aggregated status of a Test using test + counts. + """ + if self.total() == 0: + return TestStatus.NO_TESTS + elif self.crashed: + # If one of the subtests crash, the expected status + # of the Test is crashed. + return TestStatus.TEST_CRASHED + elif self.failed: + # Otherwise if one of the subtests fail, the + # expected status of the Test is failed. + return TestStatus.FAILURE + elif self.passed: + # Otherwise if one of the subtests pass, the + # expected status of the Test is passed. + return TestStatus.SUCCESS + else: + # Finally, if none of the subtests have failed, + # crashed, or passed, the expected status of the + # Test is skipped. + return TestStatus.SKIPPED + + def add_status(self, status: TestStatus) -> None: + """ + Increments count of inputted status. + + Parameters: + status - status to be added to the TestCounts object + """ + if status == TestStatus.SUCCESS: + self.passed += 1 + elif status == TestStatus.FAILURE: + self.failed += 1 + elif status == TestStatus.SKIPPED: + self.skipped += 1 + elif status != TestStatus.NO_TESTS: + self.crashed += 1 + class LineStream: - """Provides a peek()/pop() interface over an iterator of (line#, text).""" + """ + A class to represent the lines of kernel output. + Provides a peek()/pop() interface over an iterator of + (line#, text). + """ _lines: Iterator[Tuple[int, str]] _next: Tuple[int, str] _done: bool def __init__(self, lines: Iterator[Tuple[int, str]]): + """Creates a new LineStream that wraps the given iterator.""" self._lines = lines self._done = False self._next = (0, '') self._get_next() def _get_next(self) -> None: + """Advances the LineSteam to the next line.""" try: self._next = next(self._lines) except StopIteration: self._done = True def peek(self) -> str: + """Returns the current line, without advancing the LineStream. + """ return self._next[1] def pop(self) -> str: + """Returns the current line and advances the LineStream to + the next line. + """ n = self._next self._get_next() return n[1] def __bool__(self) -> bool: + """Returns True if stream has more lines.""" return not self._done # Only used by kunit_tool_test.py. def __iter__(self) -> Iterator[str]: + """Empties all lines stored in LineStream object into + Iterator object and returns the Iterator object. + """ while bool(self): yield self.pop() def line_number(self) -> int: + """Returns the line number of the current line.""" return self._next[0] -kunit_start_re = re.compile(r'TAP version [0-9]+$') -kunit_end_re = re.compile('(List of all partitions:|' - 'Kernel panic - not syncing: VFS:|reboot: System halted)') +# Parsing helper methods: + +KTAP_START = re.compile(r'KTAP version ([0-9]+)$') +TAP_START = re.compile(r'TAP version ([0-9]+)$') +KTAP_END = re.compile('(List of all partitions:|' + 'Kernel panic - not syncing: VFS:|reboot: System halted)') def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: - def isolate_kunit_output(kernel_output: Iterable[str]) -> Iterator[Tuple[int, str]]: + """Extracts KTAP lines from the kernel output.""" + def isolate_ktap_output(kernel_output: Iterable[str]) \ + -> Iterator[Tuple[int, str]]: line_num = 0 started = False for line in kernel_output: line_num += 1 - line = line.rstrip() # line always has a trailing \n - if kunit_start_re.search(line): + line = line.rstrip() # remove trailing \n + if not started and KTAP_START.search(line): + # start extracting KTAP lines and set prefix + # to number of characters before version line + prefix_len = len( + line.split('KTAP version')[0]) + started = True + yield line_num, line[prefix_len:] + elif not started and TAP_START.search(line): + # start extracting KTAP lines and set prefix + # to number of characters before version line prefix_len = len(line.split('TAP version')[0]) started = True yield line_num, line[prefix_len:] - elif kunit_end_re.search(line): + elif started and KTAP_END.search(line): + # stop extracting KTAP lines break elif started: - yield line_num, line[prefix_len:] - return LineStream(lines=isolate_kunit_output(kernel_output)) - -DIVIDER = '=' * 60 - -RESET = '\033[0;0m' - -def red(text) -> str: - return '\033[1;31m' + text + RESET - -def yellow(text) -> str: - return '\033[1;33m' + text + RESET - -def green(text) -> str: - return '\033[1;32m' + text + RESET - -def print_with_timestamp(message) -> None: - print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) - -def format_suite_divider(message) -> str: - return '======== ' + message + ' ========' + # remove prefix and any indention and yield + # line with line number + line = line[prefix_len:].lstrip() + yield line_num, line + return LineStream(lines=isolate_ktap_output(kernel_output)) + +KTAP_VERSIONS = [1] +TAP_VERSIONS = [13, 14] + +def check_version(version_num: int, accepted_versions: List[int], + version_type: str, test: Test) -> None: + """ + Adds error to test object if version number is too high or too + low. + + Parameters: + version_num - The inputted version number from the parsed KTAP or TAP + header line + accepted_version - List of accepted KTAP or TAP versions + version_type - 'KTAP' or 'TAP' depending on the type of + version line. + test - Test object for current test being parsed + """ + if version_num < min(accepted_versions): + test.add_error(version_type + + ' version lower than expected!') + elif version_num > max(accepted_versions): + test.add_error( + version_type + ' version higher than expected!') + +def parse_ktap_header(lines: LineStream, test: Test) -> bool: + """ + Parses KTAP/TAP header line and checks version number. + Returns False if fails to parse KTAP/TAP header line. + + Accepted formats: + - 'KTAP version [version number]' + - 'TAP version [version number]' + + Parameters: + lines - LineStream of KTAP output to parse + test - Test object for current test being parsed + + Return: + True if successfully parsed KTAP/TAP header line + """ + ktap_match = KTAP_START.match(lines.peek()) + tap_match = TAP_START.match(lines.peek()) + if ktap_match: + version_num = int(ktap_match.group(1)) + check_version(version_num, KTAP_VERSIONS, 'KTAP', test) + elif tap_match: + version_num = int(tap_match.group(1)) + check_version(version_num, TAP_VERSIONS, 'TAP', test) + else: + return False + test.log.append(lines.pop()) + return True -def print_suite_divider(message) -> None: - print_with_timestamp(DIVIDER) - print_with_timestamp(format_suite_divider(message)) +TEST_HEADER = re.compile(r'^# Subtest: (.*)$') -def print_log(log) -> None: - for m in log: - print_with_timestamp(m) +def parse_test_header(lines: LineStream, test: Test) -> bool: + """ + Parses test header and stores test name in test object. + Returns False if fails to parse test header line. -TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*# (Subtest:|.*: kunit test case crashed!)).*$') + Accepted format: + - '# Subtest: [test name]' -def consume_non_diagnostic(lines: LineStream) -> None: - while lines and not TAP_ENTRIES.match(lines.peek()): - lines.pop() + Parameters: + lines - LineStream of KTAP output to parse + test - Test object for current test being parsed -def save_non_diagnostic(lines: LineStream, test_case: TestCase) -> None: - while lines and not TAP_ENTRIES.match(lines.peek()): - test_case.log.append(lines.peek()) - lines.pop() + Return: + True if successfully parsed test header line + """ + match = TEST_HEADER.match(lines.peek()) + if not match: + return False + test.log.append(lines.pop()) + test.name = match.group(1) + return True -OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) +TEST_PLAN = re.compile(r'1\.\.([0-9]+)') -OK_NOT_OK_SKIP = re.compile(r'^[\s]*(ok|not ok) [0-9]+ - (.*) # SKIP(.*)$') +def parse_test_plan(lines: LineStream, test: Test) -> bool: + """ + Parses test plan line and stores the expected number of subtests in + test object. Reports an error if expected count is 0. + Returns False and reports missing test plan error if fails to parse + test plan. -OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') + Accepted format: + - '1..[number of subtests]' -OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') + Parameters: + lines - LineStream of KTAP output to parse + test - Test object for current test being parsed -def parse_ok_not_ok_test_case(lines: LineStream, test_case: TestCase) -> bool: - save_non_diagnostic(lines, test_case) - if not lines: - test_case.status = TestStatus.TEST_CRASHED - return True - line = lines.peek() - match = OK_NOT_OK_SUBTEST.match(line) - while not match and lines: - line = lines.pop() - match = OK_NOT_OK_SUBTEST.match(line) - if match: - test_case.log.append(lines.pop()) - test_case.name = match.group(2) - skip_match = OK_NOT_OK_SKIP.match(line) - if skip_match: - test_case.status = TestStatus.SKIPPED - return True - if test_case.status == TestStatus.TEST_CRASHED: - return True - if match.group(1) == 'ok': - test_case.status = TestStatus.SUCCESS - else: - test_case.status = TestStatus.FAILURE - return True - else: + Return: + True if successfully parsed test plan line + """ + match = TEST_PLAN.match(lines.peek()) + if not match: + test.expected_count = None + test.add_error('missing plan line!') return False - -SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') -DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') - -def parse_diagnostic(lines: LineStream, test_case: TestCase) -> bool: - save_non_diagnostic(lines, test_case) - if not lines: + test.log.append(lines.pop()) + expected_count = int(match.group(1)) + test.expected_count = expected_count + if expected_count == 0: + test.status = TestStatus.NO_TESTS + test.add_error('0 tests run!') + return True + +TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') + +TEST_RESULT_SKIP = re.compile(r'^(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') + +def peek_test_name_match(lines: LineStream, test: Test) -> bool: + """ + Matches current line with the format of a test result line and checks + if the name matches the name of the current test. + Returns False if fails to match format or name. + + Accepted format: + - '[ok|not ok] [test number] [-] [test name] [optional skip + directive]' + + Parameters: + lines - LineStream of KTAP output to parse + test - Test object for current test being parsed + + Return: + True if matched a test result line and the name matching the + expected test name + """ + line = lines.peek() + match = TEST_RESULT.match(line) + if not match: return False + name = match.group(4) + return (name == test.name) + +def parse_test_result(lines: LineStream, test: Test, + expected_num: int) -> bool: + """ + Parses test result line and stores the status and name in the test + object. Reports an error if the test number does not match expected + test number. + Returns False if fails to parse test result line. + + Note that the SKIP directive is the only direction that causes a + change in status. + + Accepted format: + - '[ok|not ok] [test number] [-] [test name] [optional skip + directive]' + + Parameters: + lines - LineStream of KTAP output to parse + test - Test object for current test being parsed + expected_num - expected test number for current test + + Return: + True if successfully parsed a test result line. + """ line = lines.peek() - match = SUBTEST_DIAGNOSTIC.match(line) - if match: - test_case.log.append(lines.pop()) - crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line) - if crash_match: - test_case.status = TestStatus.TEST_CRASHED - return True - else: + match = TEST_RESULT.match(line) + skip_match = TEST_RESULT_SKIP.match(line) + + # Check if line matches test result line format + if not match: return False + test.log.append(lines.pop()) -def parse_test_case(lines: LineStream) -> Optional[TestCase]: - test_case = TestCase() - save_non_diagnostic(lines, test_case) - while parse_diagnostic(lines, test_case): - pass - if parse_ok_not_ok_test_case(lines, test_case): - return test_case + # Set name of test object + if skip_match: + test.name = skip_match.group(4) else: - return None - -SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') - -def parse_subtest_header(lines: LineStream) -> Optional[str]: - consume_non_diagnostic(lines) - if not lines: - return None - match = SUBTEST_HEADER.match(lines.peek()) - if match: - lines.pop() - return match.group(1) + test.name = match.group(4) + + # Check test num + num = int(match.group(2)) + if num != expected_num: + test.add_error('Expected test number ' + + str(expected_num) + ' but found ' + str(num)) + + # Set status of test object + status = match.group(1) + if skip_match: + test.status = TestStatus.SKIPPED + elif status == 'ok': + test.status = TestStatus.SUCCESS else: - return None + test.status = TestStatus.FAILURE + return True + +def parse_diagnostic(lines: LineStream) -> List[str]: + """ + Parse lines that do not match the format of a test result line or + test header line and returns them in list. + + Line formats that are not parsed: + - '# Subtest: [test name]' + - '[ok|not ok] [test number] [-] [test name] [optional skip + directive]' + + Parameters: + lines - LineStream of KTAP output to parse + + Return: + Log of diagnostic lines + """ + log = [] # type: List[str] + while lines and not TEST_RESULT.match(lines.peek()) and not \ + TEST_HEADER.match(lines.peek()): + log.append(lines.pop()) + return log + +DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^# .*?: kunit test case crashed!$') + +def parse_crash_in_log(test: Test) -> bool: + """ + Iterate through the lines of the log to parse for crash message. + If crash message found, set status to crashed and return True. + Otherwise return False. + + Parameters: + test - Test object for current test being parsed + + Return: + True if crash message found in log + """ + for line in test.log: + if DIAGNOSTIC_CRASH_MESSAGE.match(line): + test.status = TestStatus.TEST_CRASHED + return True + return False -SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') -def parse_subtest_plan(lines: LineStream) -> Optional[int]: - consume_non_diagnostic(lines) - match = SUBTEST_PLAN.match(lines.peek()) - if match: - lines.pop() - return int(match.group(1)) - else: - return None - -def max_status(left: TestStatus, right: TestStatus) -> TestStatus: - if left == right: - return left - elif left == TestStatus.TEST_CRASHED or right == TestStatus.TEST_CRASHED: - return TestStatus.TEST_CRASHED - elif left == TestStatus.FAILURE or right == TestStatus.FAILURE: - return TestStatus.FAILURE - elif left == TestStatus.SKIPPED: - return right - else: - return left +# Printing helper methods: -def parse_ok_not_ok_test_suite(lines: LineStream, - test_suite: TestSuite, - expected_suite_index: int) -> bool: - consume_non_diagnostic(lines) - if not lines: - test_suite.status = TestStatus.TEST_CRASHED - return False - line = lines.peek() - match = OK_NOT_OK_MODULE.match(line) - if match: - lines.pop() - if match.group(1) == 'ok': - test_suite.status = TestStatus.SUCCESS - else: - test_suite.status = TestStatus.FAILURE - skip_match = OK_NOT_OK_SKIP.match(line) - if skip_match: - test_suite.status = TestStatus.SKIPPED - suite_index = int(match.group(2)) - if suite_index != expected_suite_index: - print_with_timestamp( - red('[ERROR] ') + 'expected_suite_index ' + - str(expected_suite_index) + ', but got ' + - str(suite_index)) - return True - else: - return False +DIVIDER = '=' * 60 -def bubble_up_errors(status_list: Iterable[TestStatus]) -> TestStatus: - return reduce(max_status, status_list, TestStatus.SKIPPED) +RESET = '\033[0;0m' -def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: - max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases) - return max_status(max_test_case_status, test_suite.status) +def red(text: str) -> str: + """Returns inputted string with red color code.""" + return '\033[1;31m' + text + RESET -def parse_test_suite(lines: LineStream, expected_suite_index: int) -> Optional[TestSuite]: - if not lines: - return None - consume_non_diagnostic(lines) - test_suite = TestSuite() - test_suite.status = TestStatus.SUCCESS - name = parse_subtest_header(lines) - if not name: - return None - test_suite.name = name - expected_test_case_num = parse_subtest_plan(lines) - if expected_test_case_num is None: - return None - while expected_test_case_num > 0: - test_case = parse_test_case(lines) - if not test_case: - break - test_suite.cases.append(test_case) - expected_test_case_num -= 1 - if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index): - test_suite.status = bubble_up_test_case_errors(test_suite) - return test_suite - elif not lines: - print_with_timestamp(red('[ERROR] ') + 'ran out of lines before end token') - return test_suite - else: - print(f'failed to parse end of suite "{name}", at line {lines.line_number()}: {lines.peek()}') - return None +def yellow(text: str) -> str: + """Returns inputted string with yellow color code.""" + return '\033[1;33m' + text + RESET -TAP_HEADER = re.compile(r'^TAP version 14$') +def green(text: str) -> str: + """Returns inputted string with green color code.""" + return '\033[1;32m' + text + RESET -def parse_tap_header(lines: LineStream) -> bool: - consume_non_diagnostic(lines) - if TAP_HEADER.match(lines.peek()): - lines.pop() - return True - else: - return False +ANSI_LEN = len(red('')) -TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') +def print_with_timestamp(message: str) -> None: + """Prints message with timestamp at beginning.""" + print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) -def parse_test_plan(lines: LineStream) -> Optional[int]: - consume_non_diagnostic(lines) - match = TEST_PLAN.match(lines.peek()) - if match: - lines.pop() - return int(match.group(1)) - else: - return None - -def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: - return bubble_up_errors(x.status for x in test_suites) - -def parse_test_result(lines: LineStream) -> TestResult: - consume_non_diagnostic(lines) - if not lines or not parse_tap_header(lines): - return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) - expected_test_suite_num = parse_test_plan(lines) - if expected_test_suite_num == 0: - return TestResult(TestStatus.NO_TESTS, [], lines) - elif expected_test_suite_num is None: - return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) - test_suites = [] - for i in range(1, expected_test_suite_num + 1): - test_suite = parse_test_suite(lines, i) - if test_suite: - test_suites.append(test_suite) +def format_test_divider(message: str, len_message: int) -> str: + """ + Returns string with message centered in fixed width divider. + + Example: + '===================== message example =====================' + + Parameters: + message - message to be centered in divider line + len_message - length of the message to be printed such that + any characters of the color codes are not counted + + Return: + String containing message centered in fixed width divider + """ + default_count = 3 # default number of dashes + len_1 = default_count + len_2 = default_count + difference = len(DIVIDER) - len_message - 2 # 2 spaces added + if difference > 0: + # calculate number of dashes for each side of the divider + len_1 = int(difference / 2) + len_2 = difference - len_1 + return ('=' * len_1) + ' ' + message + ' ' + ('=' * len_2) + +def print_test_header(test: Test) -> None: + """ + Prints test header with test name and optionally the expected number + of subtests. + + Example: + '=================== example (2 subtests) ===================' + + Parameters: + test - Test object representing current test being printed + """ + message = test.name + if test.expected_count: + if test.expected_count == 1: + message += (' (' + str(test.expected_count) + + ' subtest)') else: - print_with_timestamp( - red('[ERROR] ') + ' expected ' + - str(expected_test_suite_num) + - ' test suites, but got ' + str(i - 2)) - break - test_suite = parse_test_suite(lines, -1) - if test_suite: - print_with_timestamp(red('[ERROR] ') + - 'got unexpected test suite: ' + test_suite.name) - if test_suites: - return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) - else: - return TestResult(TestStatus.NO_TESTS, [], lines) + message += (' (' + str(test.expected_count) + + ' subtests)') + print_with_timestamp(format_test_divider(message, len(message))) -class TestCounts: - passed: int - failed: int - crashed: int - skipped: int +def print_log(log: Iterable[str]) -> None: + """ + Prints all strings in saved log for test in yellow. - def __init__(self): - self.passed = 0 - self.failed = 0 - self.crashed = 0 - self.skipped = 0 - - def total(self) -> int: - return self.passed + self.failed + self.crashed + self.skipped - -def print_and_count_results(test_result: TestResult) -> TestCounts: - counts = TestCounts() - for test_suite in test_result.suites: - if test_suite.status == TestStatus.SUCCESS: - print_suite_divider(green('[PASSED] ') + test_suite.name) - elif test_suite.status == TestStatus.SKIPPED: - print_suite_divider(yellow('[SKIPPED] ') + test_suite.name) - elif test_suite.status == TestStatus.TEST_CRASHED: - print_suite_divider(red('[CRASHED] ' + test_suite.name)) - else: - print_suite_divider(red('[FAILED] ') + test_suite.name) - for test_case in test_suite.cases: - if test_case.status == TestStatus.SUCCESS: - counts.passed += 1 - print_with_timestamp(green('[PASSED] ') + test_case.name) - elif test_case.status == TestStatus.SKIPPED: - counts.skipped += 1 - print_with_timestamp(yellow('[SKIPPED] ') + test_case.name) - elif test_case.status == TestStatus.TEST_CRASHED: - counts.crashed += 1 - print_with_timestamp(red('[CRASHED] ' + test_case.name)) - print_log(map(yellow, test_case.log)) - print_with_timestamp('') + Parameters: + log - Iterable object with all strings saved in log for test + """ + for m in log: + print_with_timestamp(yellow(m)) + +def format_test_result(test: Test) -> str: + """ + Returns string with formatted test result with colored status and test + name. + + Example: + '[PASSED] example' + + Parameters: + test - Test object representing current test being printed + + Return: + String containing formatted test result + """ + if test.status == TestStatus.SUCCESS: + return (green('[PASSED] ') + test.name) + elif test.status == TestStatus.SKIPPED: + return (yellow('[SKIPPED] ') + test.name) + elif test.status == TestStatus.TEST_CRASHED: + print_log(test.log) + return (red('[CRASHED] ') + test.name) + else: + print_log(test.log) + return (red('[FAILED] ') + test.name) + +def print_test_result(test: Test) -> None: + """ + Prints result line with status of test. + + Example: + '[PASSED] example' + + Parameters: + test - Test object representing current test being printed + """ + print_with_timestamp(format_test_result(test)) + +def print_test_footer(test: Test) -> None: + """ + Prints test footer with status of test. + + Example: + '===================== [PASSED] example =====================' + + Parameters: + test - Test object representing current test being printed + """ + message = format_test_result(test) + print_with_timestamp(format_test_divider(message, + len(message) - ANSI_LEN)) + +def print_summary_line(test: Test) -> None: + """ + Prints summary line of test object. Color of line is dependent on + status of test. Color is green if test passes, yellow if test is + skipped, and red if the test fails or crashes. Summary line contains + counts of the statuses of the tests subtests or the test itself if it + has no subtests. + + Example: + "Testing complete. Passed: 2, Failed: 0, Crashed: 0, Skipped: 0, + Errors: 0" + + test - Test object representing current test being printed + """ + if test.status == TestStatus.SUCCESS: + color = green + elif test.status == TestStatus.SKIPPED or test.status == TestStatus.NO_TESTS: + color = yellow + else: + color = red + counts = test.counts + print_with_timestamp(color('Testing complete. ' + str(counts))) + +def print_error(error_message: str) -> None: + """ + Prints error message with error format. + + Example: + "[ERROR] Test example: missing test plan!" + + Parameters: + error_message - message describing error + """ + print_with_timestamp(red('[ERROR] ') + error_message) + +# Other methods: + +def bubble_up_test_results(test: Test) -> None: + """ + If the test has subtests, add the test counts of the subtests to the + test and check if any of the tests crashed and if so set the test + status to crashed. Otherwise if the test has no subtests add the + status of the test to the test counts. + + Parameters: + test - Test object for current test being parsed + """ + parse_crash_in_log(test) + subtests = test.subtests + counts = test.counts + status = test.status + for t in subtests: + counts.add_subtest_counts(t.counts) + if counts.total() == 0: + counts.add_status(status) + elif test.counts.get_status() == TestStatus.TEST_CRASHED: + test.status = TestStatus.TEST_CRASHED + +def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: + """ + Finds next test to parse in LineStream, creates new Test object, + parses any subtests of the test, populates Test object with all + information (status, name) about the test and the Test objects for + any subtests, and then returns the Test object. The method accepts + three formats of tests: + + Accepted test formats: + + - Main KTAP/TAP header + + Example: + + KTAP version 1 + 1..4 + [subtests] + + - Subtest header line + + Example: + + # Subtest: name + 1..3 + [subtests] + ok 1 name + + - Test result line + + Example: + + ok 1 - test + + Parameters: + lines - LineStream of KTAP output to parse + expected_num - expected test number for test to be parsed + log - list of strings containing any preceding diagnostic lines + corresponding to the current test + + Return: + Test object populated with characteristics and any subtests + """ + test = Test() + test.log.extend(log) + parent_test = False + main = parse_ktap_header(lines, test) + if main: + # If KTAP/TAP header is found, attempt to parse + # test plan + test.name = "main" + parse_test_plan(lines, test) + else: + # If KTAP/TAP header is not found, test must be subtest + # header or test result line so parse attempt to parser + # subtest header + parent_test = parse_test_header(lines, test) + if parent_test: + # If subtest header is found, attempt to parse + # test plan and print header + parse_test_plan(lines, test) + print_test_header(test) + expected_count = test.expected_count + subtests = [] + test_num = 1 + while expected_count is None or test_num <= expected_count: + # Loop to parse any subtests. + # Break after parsing expected number of tests or + # if expected number of tests is unknown break when test + # result line with matching name to subtest header is found + # or no more lines in stream. + sub_log = parse_diagnostic(lines) + sub_test = Test() + if not lines or (peek_test_name_match(lines, test) and + not main): + if expected_count and test_num <= expected_count: + # If parser reaches end of test before + # parsing expected number of subtests, print + # crashed subtest and record error + test.add_error('missing expected subtest!') + sub_test.log.extend(sub_log) + test.counts.add_status( + TestStatus.TEST_CRASHED) + print_test_result(sub_test) else: - counts.failed += 1 - print_with_timestamp(red('[FAILED] ') + test_case.name) - print_log(map(yellow, test_case.log)) - print_with_timestamp('') - return counts + test.log.extend(sub_log) + break + else: + sub_test = parse_test(lines, test_num, sub_log) + subtests.append(sub_test) + test_num += 1 + test.subtests = subtests + if not main: + # If not main test, look for test result line + test.log.extend(parse_diagnostic(lines)) + if (parent_test and peek_test_name_match(lines, test)) or \ + not parent_test: + parse_test_result(lines, test, expected_num) + else: + test.add_error('missing subtest result line!') + # Add statuses to TestCounts attribute in Test object + bubble_up_test_results(test) + if parent_test: + # If test has subtests and is not the main test object, print + # footer. + print_test_footer(test) + elif not main: + print_test_result(test) + return test def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: - counts = TestCounts() + """ + Using kernel output, extract KTAP lines, parse the lines for test + results and print condensed test results and summary line . + + Parameters: + kernel_output - Iterable object contains lines of kernel output + + Return: + TestResult - Tuple containg status of main test object, main test + object with all subtests, and log of all KTAP lines. + """ + print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) - test_result = parse_test_result(lines) - if test_result.status == TestStatus.NO_TESTS: - print(red('[ERROR] ') + yellow('no tests run!')) - elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS: - print(red('[ERROR] ') + yellow('could not parse test results!')) + test = Test() + if not lines: + test.add_error('invalid KTAP input!') + test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - counts = print_and_count_results(test_result) + test = parse_test(lines, 0, []) + if test.status != TestStatus.NO_TESTS: + test.status = test.counts.get_status() print_with_timestamp(DIVIDER) - if test_result.status == TestStatus.SUCCESS: - fmt = green - elif test_result.status == TestStatus.SKIPPED: - fmt = yellow - else: - fmt =red - print_with_timestamp( - fmt('Testing complete. %d tests run. %d failed. %d crashed. %d skipped.' % - (counts.total(), counts.failed, counts.crashed, counts.skipped))) - return test_result + print_summary_line(test) + return TestResult(test.status, test, lines) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 619c4554cbff..9c4126731457 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -14,6 +14,7 @@ import tempfile, shutil # Handling test_tmpdir import itertools import json import signal +import subprocess import os import kunit_config @@ -106,10 +107,10 @@ class KUnitParserTest(unittest.TestCase): with open(log_path) as file: result = kunit_parser.extract_tap_lines(file.readlines()) self.assertContains('TAP version 14', result) - self.assertContains(' # Subtest: example', result) - self.assertContains(' 1..2', result) - self.assertContains(' ok 1 - example_simple_test', result) - self.assertContains(' ok 2 - example_mock_test', result) + self.assertContains('# Subtest: example', result) + self.assertContains('1..2', result) + self.assertContains('ok 1 - example_simple_test', result) + self.assertContains('ok 2 - example_mock_test', result) self.assertContains('ok 1 - example', result) def test_output_with_prefix_isolated_correctly(self): @@ -117,28 +118,28 @@ class KUnitParserTest(unittest.TestCase): with open(log_path) as file: result = kunit_parser.extract_tap_lines(file.readlines()) self.assertContains('TAP version 14', result) - self.assertContains(' # Subtest: kunit-resource-test', result) - self.assertContains(' 1..5', result) - self.assertContains(' ok 1 - kunit_resource_test_init_resources', result) - self.assertContains(' ok 2 - kunit_resource_test_alloc_resource', result) - self.assertContains(' ok 3 - kunit_resource_test_destroy_resource', result) - self.assertContains(' foo bar #', result) - self.assertContains(' ok 4 - kunit_resource_test_cleanup_resources', result) - self.assertContains(' ok 5 - kunit_resource_test_proper_free_ordering', result) + self.assertContains('# Subtest: kunit-resource-test', result) + self.assertContains('1..5', result) + self.assertContains('ok 1 - kunit_resource_test_init_resources', result) + self.assertContains('ok 2 - kunit_resource_test_alloc_resource', result) + self.assertContains('ok 3 - kunit_resource_test_destroy_resource', result) + self.assertContains('foo bar #', result) + self.assertContains('ok 4 - kunit_resource_test_cleanup_resources', result) + self.assertContains('ok 5 - kunit_resource_test_proper_free_ordering', result) self.assertContains('ok 1 - kunit-resource-test', result) - self.assertContains(' foo bar # non-kunit output', result) - self.assertContains(' # Subtest: kunit-try-catch-test', result) - self.assertContains(' 1..2', result) - self.assertContains(' ok 1 - kunit_test_try_catch_successful_try_no_catch', + self.assertContains('foo bar # non-kunit output', result) + self.assertContains('# Subtest: kunit-try-catch-test', result) + self.assertContains('1..2', result) + self.assertContains('ok 1 - kunit_test_try_catch_successful_try_no_catch', result) - self.assertContains(' ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch', + self.assertContains('ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch', result) self.assertContains('ok 2 - kunit-try-catch-test', result) - self.assertContains(' # Subtest: string-stream-test', result) - self.assertContains(' 1..3', result) - self.assertContains(' ok 1 - string_stream_test_empty_on_creation', result) - self.assertContains(' ok 2 - string_stream_test_not_empty_after_add', result) - self.assertContains(' ok 3 - string_stream_test_get_string', result) + self.assertContains('# Subtest: string-stream-test', result) + self.assertContains('1..3', result) + self.assertContains('ok 1 - string_stream_test_empty_on_creation', result) + self.assertContains('ok 2 - string_stream_test_not_empty_after_add', result) + self.assertContains('ok 3 - string_stream_test_get_string', result) self.assertContains('ok 3 - string-stream-test', result) def test_parse_successful_test_log(self): @@ -149,6 +150,22 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.TestStatus.SUCCESS, result.status) + def test_parse_successful_nested_tests_log(self): + all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') + with open(all_passed_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + + def test_kselftest_nested(self): + kselftest_log = test_data_path('test_is_test_passed-kselftest.log') + with open(kselftest_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: @@ -162,17 +179,29 @@ class KUnitParserTest(unittest.TestCase): with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.suites)) + self.assertEqual(0, len(result.test.subtests)) self.assertEqual( kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) + def test_missing_test_plan(self): + missing_plan_log = test_data_path('test_is_test_passed-' + 'missing_plan.log') + with open(missing_plan_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines( + file.readlines())) + self.assertEqual(2, result.test.counts.errors) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + def test_no_tests(self): - empty_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') - with open(empty_log) as file: + header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') + with open(header_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.suites)) + self.assertEqual(0, len(result.test.subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) @@ -183,37 +212,53 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('could not parse test results!')) + print_mock.assert_any_call(StrContains('invalid KTAP input!')) print_mock.stop() - file.close() + self.assertEqual(0, len(result.test.subtests)) def test_crashed_test(self): crashed_log = test_data_path('test_is_test_passed-crash.log') with open(crashed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests( + file.readlines()) self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') - file = open(skipped_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(skipped_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) # A skipped test does not fail the whole suite. self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - file.close() def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') - file = open(skipped_log) - result = kunit_parser.parse_run_tests(file.readlines()) + with open(skipped_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual( kunit_parser.TestStatus.SKIPPED, result.status) + + def test_ignores_hyphen(self): + hyphen_log = test_data_path('test_strip_hyphen.log') + file = open(hyphen_log) + result = kunit_parser.parse_run_tests(file.readlines()) + + # A skipped test does not fail the whole suite. + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual( + "sysctl_test", + result.test.subtests[0].name) + self.assertEqual( + "example", + result.test.subtests[1].name) file.close() @@ -224,7 +269,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') @@ -233,7 +278,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') @@ -242,7 +287,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') @@ -251,7 +296,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') @@ -260,7 +305,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') @@ -269,7 +314,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual('kunit-resource-test', result.test.subtests[0].name) class LinuxSourceTreeTest(unittest.TestCase): @@ -283,13 +328,33 @@ class LinuxSourceTreeTest(unittest.TestCase): def test_valid_kunitconfig(self): with tempfile.NamedTemporaryFile('wt') as kunitconfig: - tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) def test_dir_kunitconfig(self): with tempfile.TemporaryDirectory('') as dir: - with open(os.path.join(dir, '.kunitconfig'), 'w') as f: + with open(os.path.join(dir, '.kunitconfig'), 'w'): pass - tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + + def test_invalid_arch(self): + with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'): + kunit_kernel.LinuxSourceTree('', arch='invalid') + + def test_run_kernel_hits_exception(self): + def fake_start(unused_args, unused_build_dir): + return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE) + + with tempfile.TemporaryDirectory('') as build_dir: + tree = kunit_kernel.LinuxSourceTree(build_dir, load_config=False) + mock.patch.object(tree._ops, 'start', side_effect=fake_start).start() + + with self.assertRaises(ValueError): + for line in tree.run_kernel(build_dir=build_dir): + self.assertEqual(line, 'hi\n') + raise ValueError('uh oh, did not read all output') + + with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile: + self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output') # TODO: add more test cases. @@ -322,6 +387,12 @@ class KUnitJsonTest(unittest.TestCase): result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) + def test_nested_json(self): + result = self._json_for('test_is_test_passed-all_passed_nested.log') + self.assertEqual( + {'name': 'example_simple_test', 'status': 'PASS'}, + result["sub_groups"][0]["sub_groups"][0]["test_cases"][0]) + class StrContains(str): def __eq__(self, other): return self in other @@ -380,7 +451,15 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) - self.print_mock.assert_any_call(StrContains(' 0 tests run')) + self.print_mock.assert_any_call(StrContains('invalid KTAP input!')) + + def test_exec_no_tests(self): + self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) + with self.assertRaises(SystemExit) as e: + kunit.main(['run'], self.linux_source_mock) + self.linux_source_mock.run_kernel.assert_called_once_with( + args=None, build_dir='.kunit', filter_glob='', timeout=300) + self.print_mock.assert_any_call(StrContains(' 0 tests run!')) def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -388,7 +467,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) - self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!'))) def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -397,7 +476,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) - self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + self.assertNotEqual(call, mock.call(StrContains(' 0 tests run!'))) def test_run_raw_output_kunit(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) @@ -408,6 +487,14 @@ class KUnitMainTest(unittest.TestCase): self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + def test_run_raw_output_does_not_take_positional_args(self): + # --raw_output is a string flag, but we don't want it to consume + # any positional arguments, only ones after an '=' + self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) + kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock) + self.linux_source_mock.run_kernel.assert_called_once_with( + args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300) + def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) @@ -477,6 +564,46 @@ class KUnitMainTest(unittest.TestCase): args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) + def test_list_tests(self): + want = ['suite.test1', 'suite.test2', 'suite2.test1'] + self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want + + got = kunit._list_tests(self.linux_source_mock, + kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'suite')) + + self.assertEqual(got, want) + # Should respect the user's filter glob when listing tests. + self.linux_source_mock.run_kernel.assert_called_once_with( + args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', timeout=300) + + + @mock.patch.object(kunit, '_list_tests') + def test_run_isolated_by_suite(self, mock_tests): + mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] + kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'], self.linux_source_mock) + + # Should respect the user's filter glob when listing tests. + mock_tests.assert_called_once_with(mock.ANY, + kunit.KunitExecRequest(300, '.kunit', False, 'suite*.test*', None, 'suite')) + self.linux_source_mock.run_kernel.assert_has_calls([ + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300), + ]) + + @mock.patch.object(kunit, '_list_tests') + def test_run_isolated_by_test(self, mock_tests): + mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] + kunit.main(['exec', '--run_isolated=test', 'suite*'], self.linux_source_mock) + + # Should respect the user's filter glob when listing tests. + mock_tests.assert_called_once_with(mock.ANY, + kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'test')) + self.linux_source_mock.run_kernel.assert_has_calls([ + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', timeout=300), + ]) + if __name__ == '__main__': unittest.main() diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log new file mode 100644 index 000000000000..9d5b04fe43a6 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log @@ -0,0 +1,34 @@ +TAP version 14 +1..2 + # Subtest: sysctl_test + 1..4 + # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed + ok 1 - sysctl_test_dointvec_null_tbl_data + # Subtest: example + 1..2 + init_suite + # example_simple_test: initializing + # example_simple_test: example_simple_test passed + ok 1 - example_simple_test + # example_mock_test: initializing + # example_mock_test: example_mock_test passed + ok 2 - example_mock_test + kunit example: all tests passed + ok 2 - example + # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed + ok 3 - sysctl_test_dointvec_table_len_is_zero + # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed + ok 4 - sysctl_test_dointvec_table_read_but_position_set +kunit sysctl_test: all tests passed +ok 1 - sysctl_test + # Subtest: example + 1..2 +init_suite + # example_simple_test: initializing + # example_simple_test: example_simple_test passed + ok 1 - example_simple_test + # example_mock_test: initializing + # example_mock_test: example_mock_test passed + ok 2 - example_mock_test +kunit example: all tests passed +ok 2 - example diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log new file mode 100644 index 000000000000..65d3f27feaf2 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log @@ -0,0 +1,14 @@ +TAP version 13 +1..2 +# selftests: membarrier: membarrier_test_single_thread +# TAP version 13 +# 1..2 +# ok 1 sys_membarrier available +# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected +ok 1 selftests: membarrier: membarrier_test_single_thread +# selftests: membarrier: membarrier_test_multi_thread +# TAP version 13 +# 1..2 +# ok 1 sys_membarrier available +# ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected +ok 2 selftests: membarrier: membarrier_test_multi_thread diff --git a/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log new file mode 100644 index 000000000000..5cd17b7f818a --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log @@ -0,0 +1,31 @@ +KTAP version 1 + # Subtest: sysctl_test + # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed + ok 1 - sysctl_test_dointvec_null_tbl_data + # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed + ok 2 - sysctl_test_dointvec_table_maxlen_unset + # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed + ok 3 - sysctl_test_dointvec_table_len_is_zero + # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed + ok 4 - sysctl_test_dointvec_table_read_but_position_set + # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed + ok 5 - sysctl_test_dointvec_happy_single_positive + # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed + ok 6 - sysctl_test_dointvec_happy_single_negative + # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed + ok 7 - sysctl_test_dointvec_single_less_int_min + # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed + ok 8 - sysctl_test_dointvec_single_greater_int_max +kunit sysctl_test: all tests passed +ok 1 - sysctl_test + # Subtest: example + 1..2 +init_suite + # example_simple_test: initializing + # example_simple_test: example_simple_test passed + ok 1 - example_simple_test + # example_mock_test: initializing + # example_mock_test: example_mock_test passed + ok 2 - example_mock_test +kunit example: all tests passed +ok 2 - example diff --git a/tools/testing/kunit/test_data/test_strip_hyphen.log b/tools/testing/kunit/test_data/test_strip_hyphen.log new file mode 100644 index 000000000000..92ac7c24b374 --- /dev/null +++ b/tools/testing/kunit/test_data/test_strip_hyphen.log @@ -0,0 +1,16 @@ +KTAP version 1 +1..2 + # Subtest: sysctl_test + 1..1 + # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed + ok 1 - sysctl_test_dointvec_null_tbl_data +kunit sysctl_test: all tests passed +ok 1 - sysctl_test + # Subtest: example + 1..1 +init_suite + # example_simple_test: initializing + # example_simple_test: example_simple_test passed + ok 1 example_simple_test +kunit example: all tests passed +ok 2 example diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index f2abdd6ba12e..ba1488c7c315 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -9,12 +9,12 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) -fpsimd-test: fpsimd-test.o +fpsimd-test: fpsimd-test.o asm-utils.o $(CC) -nostdlib $^ -o $@ rdvl-sve: rdvl-sve.o rdvl.o -sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-ptrace: sve-ptrace.o sve-probe-vls: sve-probe-vls.o rdvl.o -sve-test: sve-test.o +sve-test: sve-test.o asm-utils.o $(CC) -nostdlib $^ -o $@ vec-syscfg: vec-syscfg.o rdvl.o vlset: vlset.o diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO index b6b7ebfcf362..44004e53da33 100644 --- a/tools/testing/selftests/arm64/fp/TODO +++ b/tools/testing/selftests/arm64/fp/TODO @@ -1,4 +1,7 @@ - Test unsupported values in the ABIs. -- More coverage for ptrace (eg, vector length conversions). -- Coverage for signals. -- Test PR_SVE_VL_INHERITY after a double fork. +- More coverage for ptrace: + - Get/set of FFR. + - Ensure ptraced processes actually see the register state visible through + the ptrace interface. + - Big endian. +- Test PR_SVE_VL_INHERIT after a double fork. diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S new file mode 100644 index 000000000000..4b9728efc18d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-utils.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2021 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Utility functions for assembly code. + +#include <asm/unistd.h> +#include "assembler.h" + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +.globl puthexb + +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction +.globl puthexnibble + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction +.globl dumphex + + // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction +.globl memcpy + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction +.globl memfill_ae + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 8944f2189206..90bd433d2665 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -54,4 +54,15 @@ endfunction .purgem \name\()_entry .endm +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 0dbd594c2747..e21e8ea52c7e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -33,131 +33,6 @@ define_accessor setv, NVR, _vldr define_accessor getv, NVR, _vstr -// Print a single character x0 to stdout -// Clobbers x0-x2,x8 -function putc - str x0, [sp, #-16]! - - mov x0, #1 // STDOUT_FILENO - mov x1, sp - mov x2, #1 - mov x8, #__NR_write - svc #0 - - add sp, sp, #16 - ret -endfunction - -// Print a NUL-terminated string starting at address x0 to stdout -// Clobbers x0-x3,x8 -function puts - mov x1, x0 - - mov x2, #0 -0: ldrb w3, [x0], #1 - cbz w3, 1f - add x2, x2, #1 - b 0b - -1: mov w0, #1 // STDOUT_FILENO - mov x8, #__NR_write - svc #0 - - ret -endfunction - -// Utility macro to print a literal string -// Clobbers x0-x4,x8 -.macro puts string - .pushsection .rodata.str1.1, "aMS", 1 -.L__puts_literal\@: .string "\string" - .popsection - - ldr x0, =.L__puts_literal\@ - bl puts -.endm - -// Print an unsigned decimal number x0 to stdout -// Clobbers x0-x4,x8 -function putdec - mov x1, sp - str x30, [sp, #-32]! // Result can't be > 20 digits - - mov x2, #0 - strb w2, [x1, #-1]! // Write the NUL terminator - - mov x2, #10 -0: udiv x3, x0, x2 // div-mod loop to generate the digits - msub x0, x3, x2, x0 - add w0, w0, #'0' - strb w0, [x1, #-1]! - mov x0, x3 - cbnz x3, 0b - - ldrb w0, [x1] - cbnz w0, 1f - mov w0, #'0' // Print "0" for 0, not "" - strb w0, [x1, #-1]! - -1: mov x0, x1 - bl puts - - ldr x30, [sp], #32 - ret -endfunction - -// Print an unsigned decimal number x0 to stdout, followed by a newline -// Clobbers x0-x5,x8 -function putdecn - mov x5, x30 - - bl putdec - mov x0, #'\n' - bl putc - - ret x5 -endfunction - - -// Clobbers x0-x3,x8 -function puthexb - str x30, [sp, #-0x10]! - - mov w3, w0 - lsr w0, w0, #4 - bl puthexnibble - mov w0, w3 - - ldr x30, [sp], #0x10 - // fall through to puthexnibble -endfunction -// Clobbers x0-x2,x8 -function puthexnibble - and w0, w0, #0xf - cmp w0, #10 - blo 1f - add w0, w0, #'a' - ('9' + 1) -1: add w0, w0, #'0' - b putc -endfunction - -// x0=data in, x1=size in, clobbers x0-x5,x8 -function dumphex - str x30, [sp, #-0x10]! - - mov x4, x0 - mov x5, x1 - -0: subs x5, x5, #1 - b.lo 1f - ldrb w0, [x4], #1 - bl puthexb - b 0b - -1: ldr x30, [sp], #0x10 - ret -endfunction - // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -168,18 +43,6 @@ scratch: .space MAXVL_B .popsection -// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. -// Clobbers x0-x3 -function memcpy - cmp x2, #0 - b.eq 1f -0: ldrb w3, [x1], #1 - strb w3, [x0], #1 - subs x2, x2, #1 - b.ne 0b -1: ret -endfunction - // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -227,33 +90,6 @@ function setup_vreg ret x4 endfunction -// Fill x1 bytes starting at x0 with 0xae (for canary purposes) -// Clobbers x1, x2. -function memfill_ae - mov w2, #0xae - b memfill -endfunction - -// Fill x1 bytes starting at x0 with 0. -// Clobbers x1, x2. -function memclr - mov w2, #0 -endfunction - // fall through to memfill - -// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 -// Clobbers x1 -function memfill - cmp x1, #0 - b.eq 1f - -0: strb w2, [x0], #1 - subs x1, x1, #1 - b.ne 0b - -1: ret -endfunction - // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S deleted file mode 100644 index 3e81f9fab574..000000000000 --- a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// Copyright (C) 2015-2019 ARM Limited. -// Original author: Dave Martin <Dave.Martin@arm.com> -#include <asm/unistd.h> - -.arch_extension sve - -.globl sve_store_patterns - -sve_store_patterns: - mov x1, x0 - - index z0.b, #0, #1 - str q0, [x1] - - mov w8, #__NR_getpid - svc #0 - str q0, [x1, #0x10] - - mov z1.d, z0.d - str q0, [x1, #0x20] - - mov w8, #__NR_getpid - svc #0 - str q0, [x1, #0x30] - - mov z1.d, z0.d - str q0, [x1, #0x40] - - ret - -.size sve_store_patterns, . - sve_store_patterns -.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 612d3899614a..c4417bc48d4f 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2015-2020 ARM Limited. + * Copyright (C) 2015-2021 ARM Limited. * Original author: Dave Martin <Dave.Martin@arm.com> */ #include <errno.h> +#include <stdbool.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/auxv.h> +#include <sys/prctl.h> #include <sys/ptrace.h> #include <sys/types.h> #include <sys/uio.h> @@ -19,40 +21,22 @@ #include "../../kselftest.h" +#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) +#define FPSIMD_TESTS 5 + +#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS) + /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 #endif -/* Number of registers filled in by sve_store_patterns */ -#define NR_VREGS 5 - -void sve_store_patterns(__uint128_t v[NR_VREGS]); - -static void dump(const void *buf, size_t size) -{ - size_t i; - const unsigned char *p = buf; - - for (i = 0; i < size; ++i) - printf(" %.2x", *p++); -} - -static int check_vregs(const __uint128_t vregs[NR_VREGS]) +static void fill_buf(char *buf, size_t size) { int i; - int ok = 1; - - for (i = 0; i < NR_VREGS; ++i) { - printf("# v[%d]:", i); - dump(&vregs[i], sizeof vregs[i]); - putchar('\n'); - - if (vregs[i] != vregs[0]) - ok = 0; - } - return ok; + for (i = 0; i < size; i++) + buf[i] = random(); } static int do_child(void) @@ -66,6 +50,15 @@ static int do_child(void) return EXIT_SUCCESS; } +static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) +{ + struct iovec iov; + + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); + return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); +} + static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) { struct user_sve_header *sve; @@ -112,25 +105,335 @@ static int set_sve(pid_t pid, const struct user_sve_header *sve) return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); } -static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, - unsigned int vlmax) +/* Validate setting and getting the inherit flag */ +static void ptrace_set_get_inherit(pid_t child) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret; + + /* First set the flag */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = sve_vl_from_vq(SVE_VQ_MIN); + sve.flags = SVE_PT_VL_INHERIT; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n"); + return; + } + + /* + * Read back the new register state and verify that we have + * set the flags we expected. + */ + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read SVE flags\n"); + return; + } + + ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT, + "SVE_PT_VL_INHERIT set\n"); + + /* Now clear */ + sve.flags &= ~SVE_PT_VL_INHERIT; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n"); + return; + } + + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read SVE flags\n"); + return; + } + + ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT), + "SVE_PT_VL_INHERIT cleared\n"); + + free(new_sve); +} + +/* Validate attempting to set the specfied VL via ptrace */ +static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret, prctl_vl; + + *supported = false; + + /* Check if the VL is supported in this process */ + prctl_vl = prctl(PR_SVE_SET_VL, vl); + if (prctl_vl == -1) + ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n", + strerror(errno), errno); + + /* If the VL is not supported then a supported VL will be returned */ + *supported = (prctl_vl == vl); + + /* Set the VL by doing a set with no register payload */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = vl; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u\n", vl); + return; + } + + /* + * Read back the new register state and verify that we have the + * same VL that we got from prctl() on ourselves. + */ + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read VL %u\n", vl); + return; + } + + ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl); + + free(new_sve); +} + +static void check_u32(unsigned int vl, const char *reg, + uint32_t *in, uint32_t *out, int *errors) +{ + if (*in != *out) { + printf("# VL %d %s wrote %x read %x\n", + vl, reg, *in, *out); + (*errors)++; + } +} + +/* Access the FPSIMD registers via the SVE regset */ +static void ptrace_sve_fpsimd(pid_t child) { - unsigned int vq; - unsigned int i; + void *svebuf = NULL; + size_t svebufsz = 0; + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd, new_fpsimd; + unsigned int i, j; + unsigned char *p; + + /* New process should start with FPSIMD registers only */ + sve = get_sve(child, &svebuf, &svebufsz); + if (!sve) { + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + + return; + } else { + ksft_test_result_pass("get_sve(FPSIMD)\n"); + } - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) - ksft_exit_fail_msg("Dumping non-SVE register\n"); + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "Set FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto out; - if (vlmax > sve->vl) - vlmax = sve->vl; + /* Try to set a known FPSIMD state via PT_REGS_SVE */ + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; - vq = sve_vq_from_vl(sve->vl); - for (i = 0; i < num; ++i) { - printf("# z%u:", i); - dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), - vlmax); - printf("%s\n", vlmax == sve->vl ? "" : " ..."); + for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j) + p[j] = j; } + + if (set_sve(child, sve)) { + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + + goto out; + } + + /* Verify via the FPSIMD regset */ + if (get_fpsimd(child, &new_fpsimd)) { + ksft_test_result_fail("get_fpsimd(): %s\n", + strerror(errno)); + goto out; + } + if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) + ksft_test_result_pass("get_fpsimd() gave same state\n"); + else + ksft_test_result_fail("get_fpsimd() gave different state\n"); + +out: + free(svebuf); +} + +/* Validate attempting to set SVE data and read SVE data */ +static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) +{ + void *write_buf; + void *read_buf = NULL; + struct user_sve_header *write_sve; + struct user_sve_header *read_sve; + size_t read_sve_size = 0; + unsigned int vq = sve_vq_from_vl(vl); + int ret, i; + size_t data_size; + int errors = 0; + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_sve = write_buf; + + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; + + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); + + for (i = 0; i < __SVE_NUM_PREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)); + + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + /* TODO: Generate a valid FFR pattern */ + + ret = set_sve(child, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (!get_sve(child, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read VL %u data\n", vl); + goto out; + } + read_sve = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_sve->size < write_sve->size) { + ksft_test_result_fail("Wrote %d bytes, only read %d\n", + write_sve->size, read_sve->size); + goto out_read; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u Z%d\n", vl, i); + errors++; + } + } + + for (i = 0; i < __SVE_NUM_PREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u P%d\n", vl, i); + errors++; + } + } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); + + ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl); + +out_read: + free(read_buf); +out: + free(write_buf); +} + +/* Validate attempting to set SVE data and read SVE data */ +static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) +{ + void *write_buf; + struct user_sve_header *write_sve; + unsigned int vq = sve_vq_from_vl(vl); + struct user_fpsimd_state fpsimd_state; + int ret, i; + size_t data_size; + int errors = 0; + + if (__BYTE_ORDER == __BIG_ENDIAN) { + ksft_test_result_skip("Big endian not supported\n"); + return; + } + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_sve = write_buf; + + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; + + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); + + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + ret = set_sve(child, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (get_fpsimd(child, &fpsimd_state)) { + ksft_test_result_fail("Failed to read VL %u FPSIMD data\n", + vl); + goto out; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + __uint128_t tmp = 0; + + /* + * Z regs are stored endianness invariant, this won't + * work for big endian + */ + memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + sizeof(tmp)); + + if (tmp != fpsimd_state.vregs[i]) { + printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i); + errors++; + } + } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + &fpsimd_state.fpsr, &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + &fpsimd_state.fpcr, &errors); + + ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n", + vl); + +out: + free(write_buf); } static int do_parent(pid_t child) @@ -139,13 +442,8 @@ static int do_parent(pid_t child) pid_t pid; int status; siginfo_t si; - void *svebuf = NULL, *newsvebuf; - size_t svebufsz = 0, newsvebufsz; - struct user_sve_header *sve, *new_sve; - struct user_fpsimd_state *fpsimd; - unsigned int i, j; - unsigned char *p; - unsigned int vq; + unsigned int vq, vl; + bool vl_supported; /* Attach to the child */ while (1) { @@ -167,8 +465,6 @@ static int do_parent(pid_t child) if (WIFEXITED(status) || WIFSIGNALED(status)) ksft_exit_fail_msg("Child died unexpectedly\n"); - ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", - status); if (!WIFSTOPPED(status)) goto error; @@ -203,98 +499,27 @@ static int do_parent(pid_t child) } } - sve = get_sve(pid, &svebuf, &svebufsz); - if (!sve) { - int e = errno; + /* FPSIMD via SVE regset */ + ptrace_sve_fpsimd(child); - ksft_test_result_fail("get_sve: %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; + /* prctl() flags */ + ptrace_set_get_inherit(child); - goto error; - } else { - ksft_test_result_pass("get_sve\n"); - } - - ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "FPSIMD registers\n"); - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) - goto error; - - fpsimd = (struct user_fpsimd_state *)((char *)sve + - SVE_PT_FPSIMD_OFFSET); - for (i = 0; i < 32; ++i) { - p = (unsigned char *)&fpsimd->vregs[i]; - - for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) - p[j] = j; - } - - if (set_sve(pid, sve)) { - int e = errno; - - ksft_test_result_fail("set_sve(FPSIMD): %s\n", - strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); - vq = sve_vq_from_vl(sve->vl); + /* First, try to set this vector length */ + ptrace_set_get_vl(child, vl, &vl_supported); - newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); - new_sve = newsvebuf = malloc(newsvebufsz); - if (!new_sve) { - errno = ENOMEM; - perror(NULL); - goto error; - } - - *new_sve = *sve; - new_sve->flags &= ~SVE_PT_REGS_MASK; - new_sve->flags |= SVE_PT_REGS_SVE; - memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), - 0, SVE_PT_SVE_ZREG_SIZE(vq)); - new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); - if (set_sve(pid, new_sve)) { - int e = errno; - - ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } - - new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); - if (!new_sve) { - int e = errno; - - ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } - - ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, - "SVE registers\n"); - if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) - goto error; - - dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); - - p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); - for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { - unsigned char expected = i; - - if (__BYTE_ORDER == __BIG_ENDIAN) - expected = sizeof fpsimd->vregs[0] - 1 - expected; - - ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); - if (p[i] != expected) - goto error; + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_sve_get_sve_data(child, vl); + ptrace_set_sve_get_fpsimd_data(child, vl); + } else { + ksft_test_result_skip("set SVE get SVE for VL %d\n", vl); + ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl); + } } ret = EXIT_SUCCESS; @@ -309,20 +534,16 @@ disappeared: int main(void) { int ret = EXIT_SUCCESS; - __uint128_t v[NR_VREGS]; pid_t child; + srandom(getpid()); + ksft_print_header(); - ksft_set_plan(20); + ksft_set_plan(EXPECTED_TESTS); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) ksft_exit_skip("SVE not available\n"); - sve_store_patterns(v); - - if (!check_vregs(v)) - ksft_exit_fail_msg("Initial check_vregs() failed\n"); - child = fork(); if (!child) return do_child(); diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index e3e08d9c7020..f5b1b48ffff2 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -46,130 +46,6 @@ define_accessor getz, NZR, _sve_str_v define_accessor setp, NPR, _sve_ldr_p define_accessor getp, NPR, _sve_str_p -// Print a single character x0 to stdout -// Clobbers x0-x2,x8 -function putc - str x0, [sp, #-16]! - - mov x0, #1 // STDOUT_FILENO - mov x1, sp - mov x2, #1 - mov x8, #__NR_write - svc #0 - - add sp, sp, #16 - ret -endfunction - -// Print a NUL-terminated string starting at address x0 to stdout -// Clobbers x0-x3,x8 -function puts - mov x1, x0 - - mov x2, #0 -0: ldrb w3, [x0], #1 - cbz w3, 1f - add x2, x2, #1 - b 0b - -1: mov w0, #1 // STDOUT_FILENO - mov x8, #__NR_write - svc #0 - - ret -endfunction - -// Utility macro to print a literal string -// Clobbers x0-x4,x8 -.macro puts string - .pushsection .rodata.str1.1, "aMS", 1 -.L__puts_literal\@: .string "\string" - .popsection - - ldr x0, =.L__puts_literal\@ - bl puts -.endm - -// Print an unsigned decimal number x0 to stdout -// Clobbers x0-x4,x8 -function putdec - mov x1, sp - str x30, [sp, #-32]! // Result can't be > 20 digits - - mov x2, #0 - strb w2, [x1, #-1]! // Write the NUL terminator - - mov x2, #10 -0: udiv x3, x0, x2 // div-mod loop to generate the digits - msub x0, x3, x2, x0 - add w0, w0, #'0' - strb w0, [x1, #-1]! - mov x0, x3 - cbnz x3, 0b - - ldrb w0, [x1] - cbnz w0, 1f - mov w0, #'0' // Print "0" for 0, not "" - strb w0, [x1, #-1]! - -1: mov x0, x1 - bl puts - - ldr x30, [sp], #32 - ret -endfunction - -// Print an unsigned decimal number x0 to stdout, followed by a newline -// Clobbers x0-x5,x8 -function putdecn - mov x5, x30 - - bl putdec - mov x0, #'\n' - bl putc - - ret x5 -endfunction - -// Clobbers x0-x3,x8 -function puthexb - str x30, [sp, #-0x10]! - - mov w3, w0 - lsr w0, w0, #4 - bl puthexnibble - mov w0, w3 - - ldr x30, [sp], #0x10 - // fall through to puthexnibble -endfunction -// Clobbers x0-x2,x8 -function puthexnibble - and w0, w0, #0xf - cmp w0, #10 - blo 1f - add w0, w0, #'a' - ('9' + 1) -1: add w0, w0, #'0' - b putc -endfunction - -// x0=data in, x1=size in, clobbers x0-x5,x8 -function dumphex - str x30, [sp, #-0x10]! - - mov x4, x0 - mov x5, x1 - -0: subs x5, x5, #1 - b.lo 1f - ldrb w0, [x4], #1 - bl puthexb - b 0b - -1: ldr x30, [sp], #0x10 - ret -endfunction - // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -184,18 +60,6 @@ scratch: .space MAXVL_B .popsection -// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. -// Clobbers x0-x3 -function memcpy - cmp x2, #0 - b.eq 1f -0: ldrb w3, [x1], #1 - strb w3, [x0], #1 - subs x2, x2, #1 - b.ne 0b -1: ret -endfunction - // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -316,33 +180,6 @@ function setup_ffr ret x4 endfunction -// Fill x1 bytes starting at x0 with 0xae (for canary purposes) -// Clobbers x1, x2. -function memfill_ae - mov w2, #0xae - b memfill -endfunction - -// Fill x1 bytes starting at x0 with 0. -// Clobbers x1, x2. -function memclr - mov w2, #0 -endfunction - // fall through to memfill - -// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 -// Clobbers x1 -function memfill - cmp x1, #0 - b.eq 1f - -0: strb w2, [x0], #1 - subs x1, x1, #1 - b.ne 0b - -1: ret -endfunction - // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index c02071dcb563..272b888e018e 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -109,7 +109,7 @@ static int get_child_rdvl(struct vec_data *data) /* exec() a new binary which puts the VL on stdout */ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL); - fprintf(stderr, "execl(%s) failed: %d\n", + fprintf(stderr, "execl(%s) failed: %d (%s)\n", data->rdvl_binary, errno, strerror(errno)); exit(EXIT_FAILURE); @@ -180,7 +180,6 @@ static int file_read_integer(const char *name, int *val) static int file_write_integer(const char *name, int val) { FILE *f; - int ret; f = fopen(name, "w"); if (!f) { @@ -192,11 +191,6 @@ static int file_write_integer(const char *name, int val) fprintf(f, "%d", val); fclose(f); - if (ret < 0) { - ksft_test_result_fail("Error writing %d to %s\n", - val, name); - return -1; - } return 0; } @@ -335,12 +329,9 @@ static void prctl_set_same(struct vec_data *data) return; } - if (cur_vl != data->rdvl()) - ksft_test_result_pass("%s current VL is %d\n", - data->name, ret); - else - ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n", - data->name, ret, data->rdvl()); + ksft_test_result(cur_vl == data->rdvl(), + "%s set VL %d and have VL %d\n", + data->name, cur_vl, data->rdvl()); } /* Can we set a new VL for this process? */ @@ -549,6 +540,82 @@ static void prctl_set_onexec(struct vec_data *data) file_write_integer(data->default_vl_file, data->default_vl); } +/* For each VQ verify that setting via prctl() does the right thing */ +static void prctl_set_all_vqs(struct vec_data *data) +{ + int ret, vq, vl, new_vl; + int errors = 0; + + if (!data->min_vl || !data->max_vl) { + ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n", + data->name); + return; + } + + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* Attempt to set the VL */ + ret = prctl(data->prctl_set, vl); + if (ret < 0) { + errors++; + ksft_print_msg("%s prctl set failed for %d: %d (%s)\n", + data->name, vl, + errno, strerror(errno)); + continue; + } + + new_vl = ret & PR_SVE_VL_LEN_MASK; + + /* Check that we actually have the reported new VL */ + if (data->rdvl() != new_vl) { + ksft_print_msg("Set %s VL %d but RDVL reports %d\n", + data->name, new_vl, data->rdvl()); + errors++; + } + + /* Was that the VL we asked for? */ + if (new_vl == vl) + continue; + + /* Should round up to the minimum VL if below it */ + if (vl < data->min_vl) { + if (new_vl != data->min_vl) { + ksft_print_msg("%s VL %d returned %d not minimum %d\n", + data->name, vl, new_vl, + data->min_vl); + errors++; + } + + continue; + } + + /* Should round down to maximum VL if above it */ + if (vl > data->max_vl) { + if (new_vl != data->max_vl) { + ksft_print_msg("%s VL %d returned %d not maximum %d\n", + data->name, vl, new_vl, + data->max_vl); + errors++; + } + + continue; + } + + /* Otherwise we should've rounded down */ + if (!(new_vl < vl)) { + ksft_print_msg("%s VL %d returned %d, did not round down\n", + data->name, vl, new_vl); + errors++; + + continue; + } + } + + ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n", + data->name, errors); +} + typedef void (*test_type)(struct vec_data *); static const test_type tests[] = { @@ -561,10 +628,12 @@ static const test_type tests[] = { proc_write_max, prctl_get, + prctl_set_same, prctl_set, prctl_set_no_child, prctl_set_for_child, prctl_set_onexec, + prctl_set_all_vqs, }; int main(void) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 433f8bef261e..1dad8d617da8 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -9,8 +9,9 @@ test_tag FEATURE-DUMP.libbpf fixdep test_dev_cgroup -/test_progs* -!test_progs.h +/test_progs +/test_progs-no_alu32 +/test_progs-bpf_gcc test_verifier_log feature test_sock diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 799b88152e9e..54b0a41a3775 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -122,12 +122,15 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ifneq ($(CROSS_COMPILE),) HOST_BUILD_DIR := $(BUILD_DIR)/host HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include else HOST_BUILD_DIR := $(BUILD_DIR) HOST_SCRATCH_DIR := $(SCRATCH_DIR) +HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids +RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -152,7 +155,7 @@ $(notdir $(TEST_GEN_PROGS) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ - $(INCLUDE_DIR)) + $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) $(Q)mkdir -p $@ @@ -181,11 +184,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool -$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) - $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ - OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ - cp $(SCRATCH_DIR)/runqslower $@ +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/ \ + BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + cp $(RUNQSLOWER_OUTPUT)runqslower $@ TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) @@ -209,7 +214,9 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install + LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ + LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin all: docs @@ -225,7 +232,7 @@ docs-clean: $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf + | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -233,7 +240,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf + | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ @@ -258,6 +265,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by @@ -269,7 +277,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/') +$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef # Determine target endianness. @@ -315,7 +323,9 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ - test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c +# Generate both light skeleton and libbpf skeleton for these +LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o @@ -345,7 +355,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) -TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS)) +TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) @@ -394,7 +404,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) @@ -411,10 +421,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) - $$(shell ( cd $(TRUNNER_TESTS_DIR); \ - echo '/* Generated header, do not edit */'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ + $$(shell (echo '/* Generated header, do not edit */'; \ + sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \ + $(TRUNNER_TESTS_DIR)/*.c | sort ; \ ) > $$@) endif @@ -453,7 +462,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ + $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ endef @@ -513,20 +522,22 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner -$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h +$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h -$(OUTPUT)/bench.o: bench.h testing_helpers.h +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h +$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 9b17f2867488..5e287e445f75 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -201,6 +201,20 @@ Without it, the error from compiling bpf selftests looks like: __ https://reviews.llvm.org/D93563 +btf_tag test and Clang version +============================== + +The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute. +It was introduced in `Clang 14`__. + +Without it, the btf_tag selftest will be skipped and you will observe: + +.. code-block:: console + + #<test_num> btf_tag:SKIP + +__ https://reviews.llvm.org/D111588 + Clang dependencies for static linking tests =========================================== @@ -228,3 +242,16 @@ To fix this issue, user newer libbpf. .. Links .. _clang reloc patch: https://reviews.llvm.org/D102712 .. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst + +Clang dependencies for the u32 spill test (xdpwall) +=================================================== +The xdpwall selftest requires a change in `Clang 14`__. + +Without it, the xdpwall selftest will fail and the error message +from running test_progs will look like: + +.. code-block:: console + + test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007 + +__ https://reviews.llvm.org/D109073 diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6ea15b93a2f8..cc4722f693e9 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -51,6 +51,35 @@ void setup_libbpf() fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + long total = res->false_hits + res->hits + res->drops; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", + res->false_hits, total, ((float)res->false_hits / total) * 100); +} + +void false_hits_report_final(struct bench_res res[], int res_cnt) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + total_hits += res[i].hits; + total_false_hits += res[i].false_hits; + total_drops += res[i].drops; + } + total_ops = total_hits + total_false_hits + total_drops; + + printf("Summary: %ld false hits of %ld total operations. ", + total_false_hits, total_ops); + printf("Percentage = %2.2f %%\n", + ((float)total_false_hits / total_ops) * 100); +} + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) { double hits_per_sec, drops_per_sec; @@ -63,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", - hits_per_sec, hits_per_prod, drops_per_sec); + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; - double hits_mean = 0.0, drops_mean = 0.0; - double hits_stddev = 0.0, drops_stddev = 0.0; + double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; + double total_ops; for (i = 0; i < res_cnt; i++) { hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); } + total_ops_mean = hits_mean + drops_mean; if (res_cnt > 1) { for (i = 0; i < res_cnt; i++) { @@ -86,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) drops_stddev += (drops_mean - res[i].drops / 1000000.0) * (drops_mean - res[i].drops / 1000000.0) / (res_cnt - 1.0); + total_ops = res[i].hits + res[i].drops; + total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * + (total_ops_mean - total_ops / 1000000.0) / + (res_cnt - 1.0); } hits_stddev = sqrt(hits_stddev); drops_stddev = sqrt(drops_stddev); + total_ops_stddev = sqrt(total_ops_stddev); } printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("drops %8.3lf \u00B1 %5.3lfM/s\n", + printf("drops %8.3lf \u00B1 %5.3lfM/s, ", drops_mean, drops_stddev); + printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", + total_ops_mean, total_ops_stddev); } const char *argp_program_version = "benchmark"; @@ -132,9 +170,11 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; +extern struct argp bench_bloom_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, {}, }; @@ -323,6 +363,11 @@ extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; +extern const struct bench bench_bloom_lookup; +extern const struct bench bench_bloom_update; +extern const struct bench bench_bloom_false_positive; +extern const struct bench bench_hashmap_without_bloom; +extern const struct bench bench_hashmap_with_bloom; static const struct bench *benchs[] = { &bench_count_global, @@ -344,6 +389,11 @@ static const struct bench *benchs[] = { &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, + &bench_bloom_lookup, + &bench_bloom_update, + &bench_bloom_false_positive, + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index c1f48a473b02..624c6b11501f 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,6 +33,7 @@ struct env { struct bench_res { long hits; long drops; + long false_hits; }; struct bench { @@ -56,6 +57,8 @@ extern const struct bench *bench; void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +void false_hits_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 000000000000..6eeeed2913e6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ENTRIES: + args.nr_entries = strtol(arg, NULL, 10); + if (args.nr_entries == 0) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + break; + case ARG_NR_HASH_FUNCS: + args.nr_hash_funcs = strtol(arg, NULL, 10); + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + break; + case ARG_VALUE_SIZE: + args.value_size = strtol(arg, NULL, 10); + if (args.value_size < 2 || args.value_size > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, + "The bloom filter benchmarks do not support multi-consumer use\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 000000000000..8ffd385ab2f4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6..ada028aa9007 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,34 +1,8 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} - -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +set -eufo pipefail header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 000000000000..9a16be78b180 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 89c6d58e5dd6..11ee801e75e7 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,6 +34,21 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); +#undef BPF_TESTMOD_DECLARE_TRACE +#ifdef DECLARE_TRACE_WRITABLE +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) +#else +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, + TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), + TP_ARGS(ctx), + sizeof(struct bpf_testmod_test_writable_ctx) +); + #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 141d8da687d2..5d52ea2768df 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#include <linux/btf.h> +#include <linux/btf_ids.h> #include <linux/error-injection.h> #include <linux/init.h> #include <linux/module.h> @@ -13,6 +15,24 @@ DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; +noinline void +bpf_testmod_test_mod_kfunc(int i) +{ + *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; +} + +noinline int bpf_testmod_loop_test(int n) +{ + int i, sum = 0; + + /* the primary goal of this test is to test LBR. Create a lot of + * branches in the function, so we can catch it easily. + */ + for (i = 0; i < n; i++) + sum += i; + return sum; +} + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -24,7 +44,21 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .len = len, }; - trace_bpf_testmod_test_read(current, &ctx); + /* This is always true. Use the check to make sure the compiler + * doesn't remove bpf_testmod_loop_test. + */ + if (bpf_testmod_loop_test(101) > 100) + trace_bpf_testmod_test_read(current, &ctx); + + /* Magic number to enable writable tp */ + if (len == 64) { + struct bpf_testmod_test_writable_ctx writable = { + .val = 1024, + }; + trace_bpf_testmod_test_writable_bare(&writable); + if (writable.early_ret) + return snprintf(buf, len, "%d\n", writable.val); + } return -EIO; /* always fail */ } @@ -55,13 +89,26 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET_START(bpf_testmod_kfunc_ids) +BTF_ID(func, bpf_testmod_test_mod_kfunc) +BTF_SET_END(bpf_testmod_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); + static int bpf_testmod_init(void) { - return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + int ret; + + ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + if (ret) + return ret; + register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); + return 0; } static void bpf_testmod_exit(void) { + unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } @@ -71,4 +118,3 @@ module_exit(bpf_testmod_exit); MODULE_AUTHOR("Andrii Nakryiko"); MODULE_DESCRIPTION("BPF selftests module"); MODULE_LICENSE("Dual BSD/GPL"); - diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index b3892dc40111..0d71e2607832 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -17,4 +17,9 @@ struct bpf_testmod_test_write_ctx { size_t len; }; +struct bpf_testmod_test_writable_ctx { + bool early_ret; + int val; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b692e6ead9b5..b5b6b013a245 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -24,11 +24,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_DATASEC) + if (kind > BTF_KIND_DECL_TAG) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -177,6 +178,10 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_FLOAT: fprintf(out, " size=%u", t->size); break; + case BTF_KIND_DECL_TAG: + fprintf(out, " type_id=%u component_idx=%d", + t->type, btf_decl_tag(t)->component_idx); + break; default: break; } @@ -210,7 +215,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) int i; bool ok = true; - ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); + ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types"); for (i = 1; i <= nr_types; i++) { if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) @@ -249,7 +254,7 @@ const char *btf_type_c_dump(const struct btf *btf) return NULL; } - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); if (err) { fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f3daa44a8266..9d59c3990ca8 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -33,10 +33,9 @@ #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" - #define format_cgroup_path(buf, path) \ - snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, path) + snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ + CGROUP_WORK_DIR, getpid(), path) #define format_classid_path(buf) \ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 629da3854b3e..fcc9cb91b211 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,4 +26,4 @@ int join_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -#endif /* __CGROUP_HELPERS_H */ +#endif /* __CGROUP_HELPERS_H */
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 3fd83b9dc1bf..87fd1aa323a9 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -17,7 +17,7 @@ const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; bool cfg_attach = true; -char *cfg_section_name; +char *cfg_prog_name; char *cfg_path_name; static void load_and_attach_program(void) @@ -25,7 +25,11 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (ret) + error(1, 0, "failed to enable libbpf strict mode: %d", ret); + + ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); @@ -75,15 +79,15 @@ static void parse_opts(int argc, char **argv) break; case 'p': if (cfg_path_name) - error(1, 0, "only one prog name can be given"); + error(1, 0, "only one path can be given"); cfg_path_name = optarg; break; case 's': - if (cfg_section_name) - error(1, 0, "only one section can be given"); + if (cfg_prog_name) + error(1, 0, "only one prog can be given"); - cfg_section_name = optarg; + cfg_prog_name = optarg; break; } } @@ -94,7 +98,7 @@ static void parse_opts(int argc, char **argv) if (cfg_attach && !cfg_path_name) error(1, 0, "must provide a path to the BPF program"); - if (cfg_attach && !cfg_section_name) + if (cfg_attach && !cfg_prog_name) error(1, 0, "must provide a section name"); } diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 7290401ec172..9d0acc2fc6cc 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -7,7 +7,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, - const char *section_name, + const char *prog_name, const char *map_name, const char *keys_map_name, int *prog_fd, @@ -23,13 +23,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = NULL; - bpf_object__for_each_program(prog, *obj) { - if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { - main_prog = prog; - break; - } - } + main_prog = bpf_object__find_program_by_name(*obj, prog_name); if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index ba0e1efe5a45..0f9525293881 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -4,13 +4,13 @@ #include "atomics.lskel.h" -static void test_add(struct atomics *skel) +static void test_add(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__add__attach(skel); + link_fd = atomics_lskel__add__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; @@ -36,13 +36,13 @@ cleanup: close(link_fd); } -static void test_sub(struct atomics *skel) +static void test_sub(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__sub__attach(skel); + link_fd = atomics_lskel__sub__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; @@ -69,13 +69,13 @@ cleanup: close(link_fd); } -static void test_and(struct atomics *skel) +static void test_and(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__and__attach(skel); + link_fd = atomics_lskel__and__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; @@ -97,13 +97,13 @@ cleanup: close(link_fd); } -static void test_or(struct atomics *skel) +static void test_or(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__or__attach(skel); + link_fd = atomics_lskel__or__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; @@ -126,13 +126,13 @@ cleanup: close(link_fd); } -static void test_xor(struct atomics *skel) +static void test_xor(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xor__attach(skel); + link_fd = atomics_lskel__xor__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; @@ -154,13 +154,13 @@ cleanup: close(link_fd); } -static void test_cmpxchg(struct atomics *skel) +static void test_cmpxchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__cmpxchg__attach(skel); + link_fd = atomics_lskel__cmpxchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; @@ -183,13 +183,13 @@ cleanup: close(link_fd); } -static void test_xchg(struct atomics *skel) +static void test_xchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xchg__attach(skel); + link_fd = atomics_lskel__xchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; @@ -212,10 +212,10 @@ cleanup: void test_atomics(void) { - struct atomics *skel; + struct atomics_lskel *skel; __u32 duration = 0; - skel = atomics__open_and_load(); + skel = atomics_lskel__open_and_load(); if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) return; @@ -225,6 +225,7 @@ void test_atomics(void) test__skip(); goto cleanup; } + skel->bss->pid = getpid(); if (test__start_subtest("add")) test_add(skel); @@ -242,5 +243,5 @@ void test_atomics(void) test_xchg(skel); cleanup: - atomics__destroy(skel); + atomics_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index bf307bb9e446..d0bd51eb23c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -5,6 +5,11 @@ /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); +/* attach point */ +static void method(void) { + return ; +} + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -14,12 +19,26 @@ void test_attach_probe(void) struct test_attach_probe* skel; size_t uprobe_offset; ssize_t base_addr, ref_ctr_offset; + bool legacy; + + /* Check if new-style kprobe/uprobe API is supported. + * Kernels that support new FD-based kprobe and uprobe BPF attachment + * through perf_event_open() syscall expose + * /sys/bus/event_source/devices/kprobe/type and + * /sys/bus/event_source/devices/uprobe/type files, respectively. They + * contain magic numbers that are passed as "type" field of + * perf_event_attr. Lack of such file in the system indicates legacy + * kernel with old-style kprobe/uprobe attach interface through + * creating per-probe event through tracefs. For such cases + * ref_ctr_offset feature is not supported, so we don't test it. + */ + legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; base_addr = get_base_addr(); if (CHECK(base_addr < 0, "get_base_addr", "failed to find base addr: %zd", base_addr)) return; - uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); + uprobe_offset = get_uprobe_offset(&method, base_addr); ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) @@ -45,10 +64,11 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; - ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); + if (!legacy) + ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); uprobe_opts.retprobe = false; - uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */, "/proc/self/exe", @@ -58,11 +78,12 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uprobe = uprobe_link; - ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); + if (!legacy) + ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ uprobe_opts.retprobe = true; - uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */, "/proc/self/exe", @@ -82,7 +103,7 @@ void test_attach_probe(void) goto cleanup; /* trigger & validate uprobe & uretprobe */ - get_base_addr(); + method(); if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", "wrong uprobe res: %d\n", skel->bss->uprobe_res)) diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c new file mode 100644 index 000000000000..be73e3de6668 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include "bloom_filter_map.skel.h" + +static void test_fail_cases(void) +{ + __u32 value; + int fd, err; + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) + close(fd); + + /* Invalid value size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) + close(fd); + + /* Invalid max entries size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) + close(fd); + + /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_NO_PREALLOC); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) + close(fd); + + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) + return; + + /* Test invalid flags */ + err = bpf_map_update_elem(fd, NULL, &value, -1); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, 10000); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + close(fd); +} + +static void test_success_cases(void) +{ + char value[11]; + int fd, err; + + /* Create a map */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_ZERO_SEED | BPF_F_NUMA_NODE); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case")) + return; + + /* Add a value to the bloom filter */ + err = bpf_map_update_elem(fd, NULL, &value, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case")) + goto done; + + /* Lookup a value in the bloom filter */ + err = bpf_map_lookup_elem(fd, NULL, &value); + ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"); + +done: + close(fd); +} + +static void check_bloom(struct bloom_filter_map *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.check_bloom); + if (!ASSERT_OK_PTR(link, "link")) + return; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); +} + +static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals, + __u32 nr_rand_vals) +{ + int outer_map_fd, inner_map_fd, err, i, key = 0; + struct bpf_link *link; + + /* Create a bloom filter map that will be used as the inner map */ + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals), + nr_rand_vals, 0); + if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) + return; + + for (i = 0; i < nr_rand_vals; i++) { + err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to inner_map_fd")) + goto done; + } + + /* Add the bloom filter map to the outer map */ + outer_map_fd = bpf_map__fd(skel->maps.outer_map); + err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY); + if (!ASSERT_OK(err, "Add bloom filter map to outer map")) + goto done; + + /* Attach the bloom_filter_inner_map prog */ + link = bpf_program__attach(skel->progs.inner_map); + if (!ASSERT_OK_PTR(link, "link")) + goto delete_inner_map; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); + +delete_inner_map: + /* Ensure the inner bloom filter map can be deleted */ + err = bpf_map_delete_elem(outer_map_fd, &key); + ASSERT_OK(err, "Delete inner bloom filter map"); + +done: + close(inner_map_fd); +} + +static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals, + __u32 *out_nr_rand_vals) +{ + struct bloom_filter_map *skel; + int random_data_fd, bloom_fd; + __u32 *rand_vals = NULL; + __u32 map_size, val; + int err, i; + + /* Set up a bloom filter map skeleton */ + skel = bloom_filter_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load")) + return -EINVAL; + + /* Set up rand_vals */ + map_size = bpf_map__max_entries(skel->maps.map_random_data); + rand_vals = malloc(sizeof(*rand_vals) * map_size); + if (!rand_vals) { + err = -ENOMEM; + goto error; + } + + /* Generate random values and populate both skeletons */ + random_data_fd = bpf_map__fd(skel->maps.map_random_data); + bloom_fd = bpf_map__fd(skel->maps.map_bloom); + for (i = 0; i < map_size; i++) { + val = rand(); + + err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_random_data")) + goto error; + + err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_bloom")) + goto error; + + rand_vals[i] = val; + } + + *out_skel = skel; + *out_rand_vals = rand_vals; + *out_nr_rand_vals = map_size; + + return 0; + +error: + bloom_filter_map__destroy(skel); + if (rand_vals) + free(rand_vals); + return err; +} + +void test_bloom_filter_map(void) +{ + __u32 *rand_vals, nr_rand_vals; + struct bloom_filter_map *skel; + int err; + + test_fail_cases(); + test_success_cases(); + + err = setup_progs(&skel, &rand_vals, &nr_rand_vals); + if (err) + return; + + test_inner_map(skel, rand_vals, nr_rand_vals); + free(rand_vals); + + check_bloom(skel); + + bloom_filter_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 77ac24b191d4..9454331aaf85 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -589,7 +589,7 @@ out: static void test_bpf_hash_map(void) { - __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + __u32 expected_key_a = 0, expected_key_b = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; @@ -638,7 +638,6 @@ static void test_bpf_hash_map(void) val = i + 4; expected_key_a += key.a; expected_key_b += key.b; - expected_key_c += key.c; expected_val += val; err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); @@ -685,7 +684,7 @@ out: static void test_bpf_percpu_hash_map(void) { - __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + __u32 expected_key_a = 0, expected_key_b = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; @@ -722,7 +721,6 @@ static void test_bpf_percpu_hash_map(void) key.c = i + 3; expected_key_a += key.a; expected_key_b += key.b; - expected_key_c += key.c; for (j = 0; j < bpf_num_possible_cpus(); j++) { *(__u32 *)(val + j * 8) = i + j; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c index 85babb0487b3..b52ff8ce34db 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -179,7 +179,7 @@ done: free_fds(est_fds, nr_est); } -void test_bpf_iter_setsockopt(void) +void serial_test_bpf_iter_setsockopt(void) { struct bpf_iter_setsockopt *iter_skel = NULL; struct bpf_cubic *cubic_skel = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 284d5921c345..eb8eeebe6935 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -3,7 +3,7 @@ #define nr_iters 2 -void test_bpf_obj_id(void) +void serial_test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 3d002c245d2b..27f5d8ea7964 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -39,82 +39,171 @@ struct scale_test_def { bool fails; }; -void test_bpf_verif_scale(void) -{ - struct scale_test_def tests[] = { - { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ }, - - { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, - - { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* full unroll by llvm */ - { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* partial unroll. llvm will unroll loop ~150 times. - * C loop count -> 600. - * Asm loop count -> 4. - * 16k insns in loop body. - * Total of 5 such loops. Total program size ~82k insns. - */ - { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll at all. - * C loop count -> 600. - * ASM loop count -> 600. - * ~110 insns in loop body. - * Total of 5 such loops. Total program size ~1500 insns. - */ - { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop6.o", BPF_PROG_TYPE_KPROBE }, - - /* partial unroll. 19k insn in a loop. - * Total program size 20.8k insn. - * ~350k processed_insns - */ - { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll, tiny loops */ - { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* non-inlined subprogs */ - { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - - { "test_xdp_loop.o", BPF_PROG_TYPE_XDP }, - { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL }, - }; +static void scale_test(const char *file, + enum bpf_prog_type attach_type, + bool should_fail) +{ libbpf_print_fn_t old_print_fn = NULL; - int err, i; + int err; if (env.verifier_stats) { test__force_log(); old_print_fn = libbpf_set_print(libbpf_debug_print); } - for (i = 0; i < ARRAY_SIZE(tests); i++) { - const struct scale_test_def *test = &tests[i]; - - if (!test__start_subtest(test->file)) - continue; - - err = check_load(test->file, test->attach_type); - CHECK_FAIL(err && !test->fails); - } + err = check_load(file, attach_type); + if (should_fail) + ASSERT_ERR(err, "expect_error"); + else + ASSERT_OK(err, "expect_success"); if (env.verifier_stats) libbpf_set_print(old_print_fn); } + +void test_verif_scale1() +{ + scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale2() +{ + scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale3() +{ + scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_pyperf_global() +{ + scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf_subprogs() +{ + scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf50() +{ + /* full unroll by llvm */ + scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf100() +{ + /* full unroll by llvm */ + scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf180() +{ + /* full unroll by llvm */ + scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600() +{ + /* partial unroll. llvm will unroll loop ~150 times. + * C loop count -> 600. + * Asm loop count -> 4. + * 16k insns in loop body. + * Total of 5 such loops. Total program size ~82k insns. + */ + scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600_nounroll() +{ + /* no unroll at all. + * C loop count -> 600. + * ASM loop count -> 600. + * ~110 insns in loop body. + * Total of 5 such loops. Total program size ~1500 insns. + */ + scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop1() +{ + scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop2() +{ + scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop3_fail() +{ + scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */); +} + +void test_verif_scale_loop4() +{ + scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop5() +{ + scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop6() +{ + scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false); +} + +void test_verif_scale_strobemeta() +{ + /* partial unroll. 19k insn in a loop. + * Total program size 20.8k insn. + * ~350k processed_insns + */ + scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll1() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll2() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_subprogs() +{ + /* non-inlined subprogs */ + scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_sysctl_loop1() +{ + scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_sysctl_loop2() +{ + scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_xdp_loop() +{ + scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false); +} + +void test_verif_scale_seg6_loop() +{ + scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false); +} + +void test_verif_twfw() +{ + scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 649f87382c8d..ac596cb06e40 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -39,8 +39,8 @@ static bool always_log; #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define NAME_NTH(N) (0xffff0000 | N) -#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) +#define NAME_NTH(N) (0xfffe0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000) #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) #define MAX_NR_RAW_U32 1024 @@ -3661,6 +3661,285 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid type_size", }, +{ + .descr = "decl_tag test #1, struct/member, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #2, union/member, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #3, variable, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0global\0tag1\0tag2"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #4, func/parameter, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #5, invalid value", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_DECL_TAG_ENC(0, 2, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid value", +}, +{ + .descr = "decl_tag test #6, invalid target type", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_DECL_TAG_ENC(NAME_TBD, 1, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type", +}, +{ + .descr = "decl_tag test #7, invalid vlen", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "vlen != 0", +}, +{ + .descr = "decl_tag test #8, invalid kflag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag1"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, +{ + .descr = "decl_tag test #9, var, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, +{ + .descr = "decl_tag test #10, struct member, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, +{ + .descr = "decl_tag test #11, func parameter, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_DECL_TAG_ENC(NAME_TBD, 3, 2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, +{ + .descr = "decl_tag test #12, < -1 component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_DECL_TAG_ENC(NAME_TBD, 3, -2), + BTF_END_RAW, + }, + BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, +{ + .descr = "decl_tag test #13, typedef, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #14, typedef, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -4268,7 +4547,7 @@ static void do_test_file(unsigned int test_num) if (CHECK(err, "obj: %d", err)) return; - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; goto done; @@ -6421,27 +6700,33 @@ const struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ - BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */ + BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ + /* tag -> [3] struct s */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + /* tag -> [3] struct s, member 1 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ /* full copy of the above */ - BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ - BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ - BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */ - BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), - BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), - BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), - BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), - BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), - BTF_PTR_ENC(9), /* [10] */ - BTF_PTR_ENC(12), /* [11] */ - BTF_CONST_ENC(7), /* [12] */ - BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */ + BTF_TYPE_ARRAY_ENC(9, 9, 16), /* [10] */ + BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [11] */ + BTF_MEMBER_ENC(NAME_NTH(3), 12, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 13, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 10, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 9, 640), + BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), + BTF_PTR_ENC(11), /* [12] */ + BTF_PTR_ENC(14), /* [13] */ + BTF_CONST_ENC(9), /* [14] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ BTF_END_RAW, }, BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"), @@ -6458,14 +6743,16 @@ const struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ - BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */ + BTF_MEMBER_ENC(NAME_NTH(4), 9, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ - BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */ BTF_END_RAW, }, BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"), @@ -6590,9 +6877,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .expect = { .raw_types = { @@ -6616,9 +6906,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .opts = { .dont_resolve_fwds = false, @@ -6767,6 +7060,185 @@ const struct btf_dedup_test dedup_tests[] = { .dedup_table_size = 1 }, }, +{ + .descr = "dedup: func/func_arg/var tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* static int t */ + BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */ + /* void f(int a1, int a2) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), + BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ + /* tag -> t */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ + /* tag -> func */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ + /* tag -> func arg a1 */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), + BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: func/func_param tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void f(int a1, int a2) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), + BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ + /* void f(int a1, int a2) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), + BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */ + /* tag -> f: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ + /* tag -> f/a2: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ + /* tag -> f: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ + /* tag -> f/a2: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), + BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct/struct_member tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), + /* tag -> t: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + /* tag -> t/m2: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + /* tag -> t: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ + /* tag -> t/m2: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), + BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: typedef tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */ + /* tag -> t: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */ + /* tag -> t: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, }; @@ -6801,6 +7273,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); return -EINVAL; @@ -6871,8 +7345,8 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); - expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + test_btf_data = btf__raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size); if (CHECK(test_btf_size != expect_btf_size, "test_btf_size:%u != expect_btf_size:%u", test_btf_size, expect_btf_size)) { @@ -6926,8 +7400,8 @@ static void do_test_dedup(unsigned int test_num) expect_str_cur += expect_len + 1; } - test_nr_types = btf__get_nr_types(test_btf); - expect_nr_types = btf__get_nr_types(expect_btf); + test_nr_types = btf__type_cnt(test_btf); + expect_nr_types = btf__type_cnt(expect_btf); if (CHECK(test_nr_types != expect_nr_types, "test_nr_types:%u != expect_nr_types:%u", test_nr_types, expect_nr_types)) { @@ -6935,7 +7409,7 @@ static void do_test_dedup(unsigned int test_num) goto done; } - for (i = 1; i <= test_nr_types; i++) { + for (i = 1; i < test_nr_types; i++) { const struct btf_type *test_type, *expect_type; int test_size, expect_size; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 52ccf0cf35e1..aa76360d8f49 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -27,7 +27,7 @@ static struct btf_dump_test_case { static int btf_dump_all_types(const struct btf *btf, const struct btf_dump_opts *opts) { - size_t type_cnt = btf__get_nr_types(btf); + size_t type_cnt = btf__type_cnt(btf); struct btf_dump *d; int err = 0, id; @@ -36,7 +36,7 @@ static int btf_dump_all_types(const struct btf *btf, if (err) return err; - for (id = 1; id <= type_cnt; id++) { + for (id = 1; id < type_cnt; id++) { err = btf_dump__dump_type(d, id); if (err) goto done; @@ -133,7 +133,7 @@ static char *dump_buf; static size_t dump_buf_sz; static FILE *dump_buf_file; -void test_btf_dump_incremental(void) +static void test_btf_dump_incremental(void) { struct btf *btf = NULL; struct btf_dump *d = NULL; @@ -171,7 +171,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 2, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -210,7 +210,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 3, 32, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -358,12 +358,27 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1); #ifdef __SIZEOF_INT128__ - TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT, - "(__int128)0xffffffffffffffff", - 0xffffffffffffffff); - ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str, - "(__int128)0xfffffffffffffffffffffffffffffffe"), - "dump __int128"); + /* gcc encode unsigned __int128 type with name "__int128 unsigned" in dwarf, + * and clang encode it with name "unsigned __int128" in dwarf. + * Do an availability test for either variant before doing actual test. + */ + if (btf__find_by_name(btf, "unsigned __int128") > 0) { + TEST_BTF_DUMP_DATA(btf, d, NULL, str, unsigned __int128, BTF_F_COMPACT, + "(unsigned __int128)0xffffffffffffffff", + 0xffffffffffffffff); + ASSERT_OK(btf_dump_data(btf, d, "unsigned __int128", NULL, 0, &i, 16, str, + "(unsigned __int128)0xfffffffffffffffffffffffffffffffe"), + "dump unsigned __int128"); + } else if (btf__find_by_name(btf, "__int128 unsigned") > 0) { + TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128 unsigned, BTF_F_COMPACT, + "(__int128 unsigned)0xffffffffffffffff", + 0xffffffffffffffff); + ASSERT_OK(btf_dump_data(btf, d, "__int128 unsigned", NULL, 0, &i, 16, str, + "(__int128 unsigned)0xfffffffffffffffffffffffffffffffe"), + "dump unsigned __int128"); + } else { + ASSERT_TRUE(false, "unsigned_int128_not_found"); + } #endif } @@ -763,8 +778,10 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); +#endif TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, "static int cpu_profile_flip = (int)2", 2); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 8ab5d3e358dd..8afbf3d0b89a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -7,12 +7,12 @@ #include <bpf/btf.h> void test_btf_endian() { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum btf_endianness endian = BTF_LITTLE_ENDIAN; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ enum btf_endianness endian = BTF_BIG_ENDIAN; #else -#error "Unrecognized __BYTE_ORDER" +#error "Unrecognized __BYTE_ORDER__" #endif enum btf_endianness swap_endian = 1 - endian; struct btf *btf = NULL, *swap_btf = NULL; @@ -32,7 +32,7 @@ void test_btf_endian() { ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); /* Get raw BTF data in non-native endianness... */ - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -42,9 +42,9 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -58,7 +58,7 @@ void test_btf_endian() { /* swap it back to native endianness */ btf__set_endianness(swap_btf, endian); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -75,7 +75,7 @@ void test_btf_endian() { swap_btf = NULL; btf__set_endianness(btf, swap_endian); - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -85,7 +85,7 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); /* the type should appear as if it was stored in native endianness */ t = btf__type_by_id(swap_btf, var_id); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index ca7c2a91610a..b1ffe61f2aa9 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -72,7 +72,7 @@ void test_btf_split() { d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i <= btf__get_nr_types(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf2); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c new file mode 100644 index 000000000000..91821f42714d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "tag.skel.h" + +void test_btf_tag(void) +{ + struct tag *skel; + + skel = tag__open_and_load(); + if (!ASSERT_OK_PTR(skel, "btf_tag")) + return; + + if (skel->rodata->skip_tests) { + printf("%s:SKIP: btf_tag attribute not supported", __func__); + test__skip(); + } + + tag__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 022c7d89d6f4..b912eeb0b6b4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,19 +4,15 @@ #include <bpf/btf.h> #include "btf_helpers.h" -void test_btf_write() { +static void gen_btf(struct btf *btf) +{ const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; const struct btf_enum *v; const struct btf_param *p; - struct btf *btf; int id, err, str_off; - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) - return; - str_off = btf__find_str(btf, "int"); ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); @@ -281,5 +277,159 @@ void test_btf_write() { "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", "raw_dump"); + /* DECL_TAG */ + id = btf__add_decl_tag(btf, "tag1", 16, -1); + ASSERT_EQ(id, 18, "tag_id"); + t = btf__type_by_id(btf, 18); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); + ASSERT_EQ(t->type, 16, "tag_type"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx"); + ASSERT_STREQ(btf_type_raw_dump(btf, 18), + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); + + id = btf__add_decl_tag(btf, "tag2", 14, 1); + ASSERT_EQ(id, 19, "tag_id"); + t = btf__type_by_id(btf, 19); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); + ASSERT_EQ(t->type, 14, "tag_type"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); + ASSERT_STREQ(btf_type_raw_dump(btf, 19), + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); +} + +static void test_btf_add() +{ + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + return; + + gen_btf(btf); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1"); + btf__free(btf); } + +static void test_btf_add_btf() +{ + struct btf *btf1 = NULL, *btf2 = NULL; + int id; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "btf1")) + return; + + btf2 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf2, "btf2")) + goto cleanup; + + gen_btf(btf1); + gen_btf(btf2); + + id = btf__add_btf(btf1, btf2); + if (!ASSERT_EQ(id, 20, "id")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", + + /* types appended from the second BTF */ + "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[21] PTR '(anon)' type_id=20", + "[22] CONST '(anon)' type_id=24", + "[23] VOLATILE '(anon)' type_id=22", + "[24] RESTRICT '(anon)' type_id=23", + "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10", + "[26] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=20 bits_offset=0\n" + "\t'f2' type_id=20 bits_offset=32 bitfield_size=16", + "[27] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=20 bits_offset=0 bitfield_size=16", + "[28] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[29] FWD 'struct_fwd' fwd_kind=struct", + "[30] FWD 'union_fwd' fwd_kind=union", + "[31] ENUM 'enum_fwd' size=4 vlen=0", + "[32] TYPEDEF 'typedef1' type_id=20", + "[33] FUNC 'func1' type_id=34 linkage=global", + "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n" + "\t'p1' type_id=20\n" + "\t'p2' type_id=21", + "[35] VAR 'var1' type_id=20, linkage=global-alloc", + "[36] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=20 offset=4 size=8", + "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1", + "[38] DECL_TAG 'tag2' type_id=33 component_idx=1"); + +cleanup: + btf__free(btf1); + btf__free(btf2); +} + +void test_btf_write() +{ + if (test__start_subtest("btf_add")) + test_btf_add(); + if (test__start_subtest("btf_add_btf")) + test_btf_add_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 876be0ecb654..621c57222191 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -363,7 +363,7 @@ close_bpf_object: cg_storage_multi_shared__destroy(obj); } -void test_cg_storage_multi(void) +void serial_test_cg_storage_multi(void) { int parent_cgroup_fd = -1, child_cgroup_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 70e94e783070..5de485c7370f 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -21,7 +21,7 @@ static int prog_load(void) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_autodetach(void) +void serial_test_cgroup_attach_autodetach(void) { __u32 duration = 0, prog_cnt = 4, attach_flags; int allow_prog[2] = {-1}; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 20bb8831dda6..731bea84d8ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val) return ret; } -void test_cgroup_attach_multi(void) +void serial_test_cgroup_attach_multi(void) { __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9e96f8d87fea..10d3c33821a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -23,7 +23,7 @@ static int prog_load(int verdict) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_override(void) +void serial_test_cgroup_attach_override(void) { int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; __u32 duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 9091524131d6..9e6e6aad347c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls) return 0; } -void test_cgroup_link(void) +void serial_test_cgroup_link(void) { struct { const char *path; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index ab3b9bc5e6d1..9026b42914d3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -46,7 +46,7 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60123; + static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 012068f33a0a..f73e6e36b74d 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -195,7 +195,7 @@ cleanup: test_check_mtu__destroy(skel); } -void test_check_mtu(void) +void serial_test_check_mtu(void) { __u32 mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 3d4b2a358d47..1dfe14ff6aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -112,7 +112,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(f, "btf_fdopen")) goto cleanup; - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data")) goto cleanup; written = fwrite(raw_data, 1, raw_sz, f); @@ -163,7 +163,7 @@ void test_core_autosize(void) usleep(1); - bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + bss_map = bpf_object__find_map_by_name(skel->obj, ".bss"); if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 4739b15b2a97..55ec85ba7375 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -30,7 +30,7 @@ static int duration = 0; .output_len = sizeof(struct core_reloc_module_output), \ .prog_sec_name = sec_name, \ .raw_tp_name = tp_name, \ - .trigger = trigger_module_test_read, \ + .trigger = __trigger_module_test_read, \ .needs_testmod = true, \ } @@ -249,8 +249,7 @@ static int duration = 0; #define SIZE_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_size.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .relaxed_core_relocs = true + .btf_src_file = "btf__core_reloc_" #name ".o" #define SIZE_OUTPUT_DATA(type) \ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ @@ -382,7 +381,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) exp->local_anon_void_ptr = -1; exp->local_anon_arr = -1; - for (i = 1; i <= btf__get_nr_types(local_btf); i++) + for (i = 1; i < btf__type_cnt(local_btf); i++) { t = btf__type_by_id(local_btf, i); /* we are interested only in anonymous types */ @@ -475,19 +474,11 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test) return 0; } -static int trigger_module_test_read(const struct core_reloc_test_case *test) +static int __trigger_module_test_read(const struct core_reloc_test_case *test) { struct core_reloc_module_output *exp = (void *)test->output; - int fd, err; - - fd = open("/sys/kernel/bpf_testmod", O_RDONLY); - err = -errno; - if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) - return err; - - read(fd, NULL, exp->len); /* request expected number of bytes */ - close(fd); + trigger_module_test_read(exp->len); return 0; } @@ -876,7 +867,7 @@ void test_core_reloc(void) goto cleanup; } - data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); + data_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c new file mode 100644 index 000000000000..cbaa44ffb8c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <test_progs.h> +#include "dummy_st_ops.skel.h" + +/* Need to keep consistent with definition in include/linux/bpf.h */ +struct bpf_dummy_ops_state { + int val; +}; + +static void test_dummy_st_ops_attach(void) +{ + struct dummy_st_ops *skel; + struct bpf_link *link; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.dummy_1); + ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ret_value(void) +{ + __u64 args[1] = {0}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ptr_arg(void) +{ + int exp_retval = 0xbeef; + struct bpf_dummy_ops_state in_state = { + .val = exp_retval, + }; + __u64 args[1] = {(unsigned long)&in_state}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); + ASSERT_EQ(attr.retval, exp_retval, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_multiple_args(void) +{ + __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + size_t i; + char name[8]; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_2); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + for (i = 0; i < ARRAY_SIZE(args); i++) { + snprintf(name, sizeof(name), "arg %zu", i); + ASSERT_EQ(skel->bss->test_2_args[i], args[i], name); + } + + dummy_st_ops__destroy(skel); +} + +void test_dummy_st_ops(void) +{ + if (test__start_subtest("dummy_st_ops_attach")) + test_dummy_st_ops_attach(); + if (test__start_subtest("dummy_init_ret_value")) + test_dummy_init_ret_value(); + if (test__start_subtest("dummy_init_ptr_arg")) + test_dummy_init_ptr_arg(); + if (test__start_subtest("dummy_multiple_args")) + test_dummy_multiple_args(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 91154c2ba256..4374ac8a8a91 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -6,23 +6,23 @@ void test_fentry_fexit(void) { - struct fentry_test *fentry_skel = NULL; - struct fexit_test *fexit_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; int err, prog_fd, i; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto close_prog; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto close_prog; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; @@ -44,6 +44,6 @@ void test_fentry_fexit(void) } close_prog: - fentry_test__destroy(fentry_skel); - fexit_test__destroy(fexit_skel); + fentry_test_lskel__destroy(fentry_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 174c89e7456e..12921b3850d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -3,19 +3,19 @@ #include <test_progs.h> #include "fentry_test.lskel.h" -static int fentry_test(struct fentry_test *fentry_skel) +static int fentry_test(struct fentry_test_lskel *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fentry_test__test1__attach(fentry_skel); + link_fd = fentry_test_lskel__test1__attach(fentry_skel); if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fentry_test(struct fentry_test *fentry_skel) return -1; } - fentry_test__detach(fentry_skel); + fentry_test_lskel__detach(fentry_skel); /* zero results for re-attach test */ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); @@ -40,10 +40,10 @@ static int fentry_test(struct fentry_test *fentry_skel) void test_fentry_test(void) { - struct fentry_test *fentry_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fentry_test(void) ASSERT_OK(err, "fentry_second_attach"); cleanup: - fentry_test__destroy(fentry_skel); + fentry_test_lskel__destroy(fentry_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 73b4c76e6b86..9cff14a23bb7 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -60,7 +60,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, struct bpf_object *obj = NULL, *tgt_obj; __u32 retval, tgt_prog_id, info_len; struct bpf_prog_info prog_info = {}; - struct bpf_program **prog = NULL; + struct bpf_program **prog = NULL, *p; struct bpf_link **link = NULL; int err, tgt_fd, i; struct btf *btf; @@ -69,9 +69,6 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, &tgt_obj, &tgt_fd); if (!ASSERT_OK(err, "tgt_prog_load")) return; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .attach_prog_fd = tgt_fd, - ); info_len = sizeof(prog_info); err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len); @@ -89,10 +86,15 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, if (!ASSERT_OK_PTR(prog, "prog_ptr")) goto close_prog; - obj = bpf_object__open_file(obj_file, &opts); + obj = bpf_object__open_file(obj_file, NULL); if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; + bpf_object__for_each_program(p, obj) { + err = bpf_program__set_attach_target(p, tgt_fd, NULL); + ASSERT_OK(err, "set_attach_target"); + } + err = bpf_object__load(obj); if (!ASSERT_OK(err, "obj_load")) goto close_prog; @@ -270,7 +272,7 @@ static void test_fmod_ret_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *prog; __u32 duration = 0; - int err, pkt_fd; + int err, pkt_fd, attach_prog_fd; err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); @@ -278,26 +280,32 @@ static void test_fmod_ret_freplace(void) if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", tgt_name, err, errno)) return; - opts.attach_prog_fd = pkt_fd; - freplace_obj = bpf_object__open_file(freplace_name, &opts); + freplace_obj = bpf_object__open_file(freplace_name, NULL); if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; + prog = bpf_object__next_program(freplace_obj, NULL); + err = bpf_program__set_attach_target(prog, pkt_fd, NULL); + ASSERT_OK(err, "freplace__set_attach_target"); + err = bpf_object__load(freplace_obj); if (CHECK(err, "freplace_obj_load", "err %d\n", err)) goto out; - prog = bpf_program__next(NULL, freplace_obj); freplace_link = bpf_program__attach_trace(prog); if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace")) goto out; - opts.attach_prog_fd = bpf_program__fd(prog); - fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); + fmod_obj = bpf_object__open_file(fmod_ret_name, NULL); if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open")) goto out; + attach_prog_fd = bpf_program__fd(prog); + prog = bpf_object__next_program(fmod_obj, NULL); + err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL); + ASSERT_OK(err, "fmod_ret_set_attach_target"); + err = bpf_object__load(fmod_obj); if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n")) goto out; @@ -322,14 +330,14 @@ static void test_func_sockmap_update(void) } static void test_obj_load_failure_common(const char *obj_file, - const char *target_obj_file) - + const char *target_obj_file) { /* * standalone test that asserts failure to load freplace prog * because of invalid return code. */ struct bpf_object *obj = NULL, *pkt_obj; + struct bpf_program *prog; int err, pkt_fd; __u32 duration = 0; @@ -339,14 +347,15 @@ static void test_obj_load_failure_common(const char *obj_file, if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", target_obj_file, err, errno)) return; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .attach_prog_fd = pkt_fd, - ); - obj = bpf_object__open_file(obj_file, &opts); + obj = bpf_object__open_file(obj_file, NULL); if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; + prog = bpf_object__next_program(obj, NULL); + err = bpf_program__set_attach_target(prog, pkt_fd, NULL); + ASSERT_OK(err, "set_attach_target"); + /* It should fail to load the program */ err = bpf_object__load(obj); if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) @@ -371,7 +380,8 @@ static void test_func_map_prog_compatibility(void) "./test_attach_probe.o"); } -void test_fexit_bpf2bpf(void) +/* NOTE: affect other tests, must run in serial mode */ +void serial_test_fexit_bpf2bpf(void) { if (test__start_subtest("target_no_callees")) test_target_no_callees(); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index 4e7f4b42ea29..f949647dbbc2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -10,7 +10,7 @@ static int do_sleep(void *skel) { - struct fexit_sleep *fexit_skel = skel; + struct fexit_sleep_lskel *fexit_skel = skel; struct timespec ts1 = { .tv_nsec = 1 }; struct timespec ts2 = { .tv_sec = 10 }; @@ -25,16 +25,16 @@ static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { - struct fexit_sleep *fexit_skel = NULL; + struct fexit_sleep_lskel *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; int err, fexit_cnt; - fexit_skel = fexit_sleep__open_and_load(); + fexit_skel = fexit_sleep_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto cleanup; - err = fexit_sleep__attach(fexit_skel); + err = fexit_sleep_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; @@ -60,7 +60,7 @@ void test_fexit_sleep(void) */ close(fexit_skel->progs.nanosleep_fentry.prog_fd); close(fexit_skel->progs.nanosleep_fexit.prog_fd); - fexit_sleep__detach(fexit_skel); + fexit_sleep_lskel__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ kill(cpid, 9); @@ -78,5 +78,5 @@ void test_fexit_sleep(void) goto cleanup; cleanup: - fexit_sleep__destroy(fexit_skel); + fexit_sleep_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index af3dba726701..d4887d8bb396 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -3,19 +3,19 @@ #include <test_progs.h> #include "fexit_test.lskel.h" -static int fexit_test(struct fexit_test *fexit_skel) +static int fexit_test(struct fexit_test_lskel *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fexit_test__test1__attach(fexit_skel); + link_fd = fexit_test_lskel__test1__attach(fexit_skel); if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fexit_test(struct fexit_test *fexit_skel) return -1; } - fexit_test__detach(fexit_skel); + fexit_test_lskel__detach(fexit_skel); /* zero results for re-attach test */ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); @@ -40,10 +40,10 @@ static int fexit_test(struct fexit_test *fexit_skel) void test_fexit_test(void) { - struct fexit_test *fexit_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fexit_test(void) ASSERT_OK(err, "fexit_second_attach"); cleanup: - fexit_test__destroy(fexit_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 225714f71ac6..ac54e3f91d42 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -458,9 +458,9 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) return -1; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i); + snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (!prog) return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 0e8a4d2f023d..6093728497c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include <network_helpers.h> -void test_flow_dissector_load_bytes(void) +void serial_test_flow_dissector_load_bytes(void) { struct bpf_flow_keys flow_keys; __u32 duration = 0, retval, size; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 3931ede5c534..f0c6c226aba8 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -628,7 +628,7 @@ out_close: } } -void test_flow_dissector_reattach(void) +void serial_test_flow_dissector_reattach(void) { int err, new_net, saved_net; diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c new file mode 100644 index 000000000000..3948da12a528 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "get_branch_snapshot.skel.h" + +static int *pfd_array; +static int cpu_cnt; + +static bool is_hypervisor(void) +{ + char *line = NULL; + bool ret = false; + size_t len; + FILE *fp; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return false; + + while (getline(&line, &len, fp) != -1) { + if (!strncmp(line, "flags", 5)) { + if (strstr(line, "hypervisor") != NULL) + ret = true; + break; + } + } + + free(line); + fclose(fp); + return ret; +} + +static int create_perf_events(void) +{ + struct perf_event_attr attr = {0}; + int cpu; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_RAW; + attr.config = 0x1b00; + attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL | + PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; + + cpu_cnt = libbpf_num_possible_cpus(); + pfd_array = malloc(sizeof(int) * cpu_cnt); + if (!pfd_array) { + cpu_cnt = 0; + return 1; + } + + for (cpu = 0; cpu < cpu_cnt; cpu++) { + pfd_array[cpu] = syscall(__NR_perf_event_open, &attr, + -1, cpu, -1, PERF_FLAG_FD_CLOEXEC); + if (pfd_array[cpu] < 0) + break; + } + + return cpu == 0; +} + +static void close_perf_events(void) +{ + int cpu, fd; + + for (cpu = 0; cpu < cpu_cnt; cpu++) { + fd = pfd_array[cpu]; + if (fd < 0) + break; + close(fd); + } + free(pfd_array); +} + +void serial_test_get_branch_snapshot(void) +{ + struct get_branch_snapshot *skel = NULL; + int err; + + /* Skip the test before we fix LBR snapshot for hypervisor. */ + if (is_hypervisor()) { + test__skip(); + return; + } + + if (create_perf_events()) { + test__skip(); /* system doesn't support LBR */ + goto cleanup; + } + + skel = get_branch_snapshot__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load")) + goto cleanup; + + err = kallsyms_find("bpf_testmod_loop_test", &skel->bss->address_low); + if (!ASSERT_OK(err, "kallsyms_find")) + goto cleanup; + + /* Just a guess for the end of this function, as module functions + * in /proc/kallsyms could come in any order. + */ + skel->bss->address_high = skel->bss->address_low + 128; + + err = get_branch_snapshot__attach(skel); + if (!ASSERT_OK(err, "get_branch_snapshot__attach")) + goto cleanup; + + trigger_module_test_read(100); + + if (skel->bss->total_entries < 16) { + /* too few entries for the hit/waste test */ + test__skip(); + goto cleanup; + } + + ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr"); + + /* Given we stop LBR in software, we will waste a few entries. + * But we should try to waste as few as possible entries. We are at + * about 7 on x86_64 systems. + * Add a check for < 10 so that we get heads-up when something + * changes and wastes too many entries. + */ + ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries"); + +cleanup: + get_branch_snapshot__destroy(skel); + close_perf_events(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9efa7e50eab2..afd8639f9a94 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -103,11 +103,18 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) { int err = -ENOMEM, map_fd, zero = 0; - struct bpf_map *map; + struct bpf_map *map, *map2; __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) + if (!ASSERT_OK_PTR(map, "map")) + return; + if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal")) + return; + + /* ensure we can lookup internal maps by their ELF names */ + map2 = bpf_object__find_map_by_name(obj, ".rodata"); + if (!ASSERT_EQ(map, map2, "same_maps")) return; map_fd = bpf_map__fd(map); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index ee46b11f1f9a..1db86eab101b 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -16,7 +16,7 @@ void test_global_data_init(void) if (CHECK_FAIL(err)) return; - map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); + map = bpf_object__find_map_by_name(obj, ".rodata"); if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index ddfb6bf97152..01e51d16c8b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -48,7 +48,8 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) *(bool *)ctx = true; } -void test_kfree_skb(void) +/* TODO: fix kernel panic caused by this test in parallel mode */ +void serial_test_kfree_skb(void) { struct __sk_buff skb = {}; struct bpf_prog_test_run_attr tattr = { @@ -92,7 +93,7 @@ void test_kfree_skb(void) if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) goto close_prog; - global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + global_data = bpf_object__find_map_by_name(obj2, ".bss"); if (CHECK(!global_data, "find global data", "not found\n")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 9611f2bc50df..5c9c0176991b 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -7,10 +7,10 @@ static void test_main(void) { - struct kfunc_call_test *skel; + struct kfunc_call_test_lskel *skel; int prog_fd, retval, err; - skel = kfunc_call_test__open_and_load(); + skel = kfunc_call_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; @@ -26,7 +26,7 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); - kfunc_call_test__destroy(skel); + kfunc_call_test_lskel__destroy(skel); } static void test_subprog(void) diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index cf3acfa5a91d..79f6bd1e50d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -7,6 +7,7 @@ #include "test_ksyms_btf.skel.h" #include "test_ksyms_btf_null_check.skel.h" #include "test_ksyms_weak.skel.h" +#include "test_ksyms_weak.lskel.h" static int duration; @@ -89,11 +90,11 @@ static void test_weak_syms(void) int err; skel = test_ksyms_weak__open_and_load(); - if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load")) return; err = test_ksyms_weak__attach(skel); - if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ksyms_weak__attach")) goto cleanup; /* trigger tracepoint */ @@ -109,6 +110,33 @@ cleanup: test_ksyms_weak__destroy(skel); } +static void test_weak_syms_lskel(void) +{ + struct test_ksyms_weak_lskel *skel; + struct test_ksyms_weak_lskel__data *data; + int err; + + skel = test_ksyms_weak_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load")) + return; + + err = test_ksyms_weak_lskel__attach(skel); + if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym"); + ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym"); + ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym"); + ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym"); + +cleanup: + test_ksyms_weak_lskel__destroy(skel); +} + void test_ksyms_btf(void) { int percpu_datasec; @@ -136,4 +164,7 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms")) test_weak_syms(); + + if (test__start_subtest("weak_ksyms_lskel")) + test_weak_syms_lskel(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 2cd5cded543f..d490ad80eccb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -2,30 +2,61 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> -#include <bpf/libbpf.h> -#include <bpf/btf.h> +#include <network_helpers.h> #include "test_ksyms_module.lskel.h" +#include "test_ksyms_module.skel.h" -static int duration; - -void test_ksyms_module(void) +void test_ksyms_module_lskel(void) { - struct test_ksyms_module* skel; + struct test_ksyms_module_lskel *skel; + int retval; int err; - skel = test_ksyms_module__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!env.has_testmod) { + test__skip(); return; + } - err = test_ksyms_module__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + skel = test_ksyms_module_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load")) + return; + err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); +cleanup: + test_ksyms_module_lskel__destroy(skel); +} - usleep(1); +void test_ksyms_module_libbpf(void) +{ + struct test_ksyms_module *skel; + int retval, err; - ASSERT_EQ(skel->bss->triggered, true, "triggered"); - ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); + if (!env.has_testmod) { + test__skip(); + return; + } + skel = test_ksyms_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) + return; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, + sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module__destroy(skel); } + +void test_ksyms_module(void) +{ + if (test__start_subtest("lskel")) + test_ksyms_module_lskel(); + if (test__start_subtest("libbpf")) + test_ksyms_module_libbpf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index 59adb4715394..7589c03fd26b 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -541,7 +541,7 @@ close_servers: } } -void test_migrate_reuseport(void) +void serial_test_migrate_reuseport(void) { struct test_migrate_reuseport *skel; int i; diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index 97fec70c600b..b772fe30ce9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -53,7 +53,8 @@ cleanup: modify_return__destroy(skel); } -void test_modify_return(void) +/* TODO: conflict with get_func_ip_test */ +void serial_test_modify_return(void) { run_test(0 /* input_retval */, 1 /* want_side_effect */, diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index d85a69b7ce44..6d0e50dcf47c 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -2,46 +2,33 @@ /* Copyright (c) 2020 Facebook */ #include <test_progs.h> +#include <stdbool.h> #include "test_module_attach.skel.h" static int duration; -static int trigger_module_test_read(int read_sz) +static int trigger_module_test_writable(int *val) { int fd, err; + char buf[65]; + ssize_t rd; - fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); err = -errno; - if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) + if (!ASSERT_GE(fd, 0, "testmode_file_open")) return err; - read(fd, NULL, read_sz); - close(fd); - - return 0; -} - -static int trigger_module_test_write(int write_sz) -{ - int fd, err; - char *buf = malloc(write_sz); - - if (!buf) - return -ENOMEM; - - memset(buf, 'a', write_sz); - buf[write_sz-1] = '\0'; - - fd = open("/sys/kernel/bpf_testmod", O_WRONLY); + rd = read(fd, buf, sizeof(buf) - 1); err = -errno; - if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) { - free(buf); + if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) { + close(fd); return err; } - write(fd, buf, write_sz); + buf[rd] = '\0'; + *val = strtol(buf, NULL, 0); close(fd); - free(buf); + return 0; } @@ -58,6 +45,7 @@ void test_module_attach(void) struct test_module_attach__bss *bss; struct bpf_link *link; int err; + int writable_val = 0; skel = test_module_attach__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -90,6 +78,14 @@ void test_module_attach(void) ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); + bss->raw_tp_writable_bare_early_ret = true; + bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4; + ASSERT_OK(trigger_module_test_writable(&writable_val), + "trigger_writable"); + ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in"); + ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val, + "writable_test_out"); + test_module_attach__detach(skel); /* attach fentry/fexit and make sure it get's module reference */ diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 2535788e135f..24d493482ffc 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -78,7 +78,8 @@ static void test_ns_current_pid_tgid_new_ns(void) return; } -void test_ns_current_pid_tgid(void) +/* TODO: use a different tracepoint */ +void serial_test_ns_current_pid_tgid(void) { if (test__start_subtest("ns_current_pid_tgid_root_ns")) test_current_pid_tgid(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 6490e9673002..4e32f3586a75 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -43,9 +43,10 @@ int trigger_on_cpu(int cpu) return 0; } -void test_perf_buffer(void) +void serial_test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i; + int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; + int zero = 0, my_pid = getpid(); struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; @@ -71,6 +72,10 @@ void test_perf_buffer(void) if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0); + if (!ASSERT_OK(err, "my_pid_update")) + goto out_close; + /* attach probe */ err = test_perf_buffer__attach(skel); if (CHECK(err, "attach_kprobe", "err %d\n", err)) @@ -107,19 +112,19 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; - if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", - "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; - for (i = 0; i < nr_cpus; i++) { + for (i = 0, j = 0; i < nr_cpus; i++) { if (i >= on_len || !online[i]) continue; - fd = perf_buffer__buffer_fd(pb, i); + fd = perf_buffer__buffer_fd(pb, j); CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); last_fd = fd; - err = perf_buffer__consume_buffer(pb, i); + err = perf_buffer__consume_buffer(pb, j); if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) goto out_close; @@ -127,12 +132,13 @@ void test_perf_buffer(void) if (trigger_on_cpu(i)) goto out_close; - err = perf_buffer__consume_buffer(pb, i); - if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + err = perf_buffer__consume_buffer(pb, j); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err)) goto out_close; if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) goto out_close; + j++; } out_free_pb: diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index b1abd0c46607..ede07344f264 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -23,7 +23,8 @@ static void burn_cpu(void) ++j; } -void test_perf_link(void) +/* TODO: often fails in concurrent mode */ +void serial_test_perf_link(void) { struct test_perf_link *skel = NULL; struct perf_event_attr attr; diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 95bd12097358..abf890d066eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -void test_probe_user(void) +/* TODO: corrupts other tests uses connect() */ +void serial_test_probe_user(void) { - const char *prog_name = "kprobe/__sys_connect"; + const char *prog_name = "handle_sys_connect"; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; @@ -18,7 +19,7 @@ void test_probe_user(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_title(obj, prog_name); + kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK(!kprobe_prog, "find_probe", "prog '%s' not found\n", prog_name)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index 5c45424cac5f..ddefa1192e5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -3,7 +3,8 @@ #include <test_progs.h> #include <linux/nbd.h> -void test_raw_tp_writable_test_run(void) +/* NOTE: conflict with other tests. */ +void serial_test_raw_tp_writable_test_run(void) { __u32 duration = 0; char error[4096]; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 5f9eaa3ab584..fd5d2ddfb062 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -37,7 +37,7 @@ void test_rdonly_maps(void) if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); + bss_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!bss_map, "find_bss_map", "failed\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index 0e378d63fe18..f3af2627b599 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -20,18 +20,18 @@ void test_recursion(void) goto out; ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); - err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), &prog_info, &prog_info_len); if (!ASSERT_OK(err, "get_prog_info")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 4e91f4d6466c..873323fb18ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -1,6 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +static void toggle_object_autoload_progs(const struct bpf_object *obj, + const char *name_load) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + const char *name = bpf_program__name(prog); + + if (!strcmp(name_load, name)) + bpf_program__set_autoload(prog, true); + else + bpf_program__set_autoload(prog, false); + } +} + void test_reference_tracking(void) { const char *file = "test_sk_lookup_kern.o"; @@ -9,44 +24,49 @@ void test_reference_tracking(void) .object_name = obj_name, .relaxed_maps = true, ); - struct bpf_object *obj; + struct bpf_object *obj_iter, *obj = NULL; struct bpf_program *prog; __u32 duration = 0; int err = 0; - obj = bpf_object__open_file(file, &open_opts); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) + obj_iter = bpf_object__open_file(file, &open_opts); + if (!ASSERT_OK_PTR(obj_iter, "obj_iter_open_file")) return; - if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", + if (CHECK(strcmp(bpf_object__name(obj_iter), obj_name), "obj_name", "wrong obj name '%s', expected '%s'\n", - bpf_object__name(obj), obj_name)) + bpf_object__name(obj_iter), obj_name)) goto cleanup; - bpf_object__for_each_program(prog, obj) { - const char *title; + bpf_object__for_each_program(prog, obj_iter) { + const char *name; - /* Ignore .text sections */ - title = bpf_program__section_name(prog); - if (strstr(title, ".text") != NULL) + name = bpf_program__name(prog); + if (!test__start_subtest(name)) continue; - if (!test__start_subtest(title)) - continue; + obj = bpf_object__open_file(file, &open_opts); + if (!ASSERT_OK_PTR(obj, "obj_open_file")) + goto cleanup; + toggle_object_autoload_progs(obj, name); /* Expect verifier failure if test name has 'err' */ - if (strstr(title, "err_") != NULL) { + if (strncmp(name, "err_", sizeof("err_") - 1) == 0) { libbpf_print_fn_t old_print_fn; old_print_fn = libbpf_set_print(NULL); - err = !bpf_program__load(prog, "GPL", 0); + err = !bpf_object__load(obj); libbpf_set_print(old_print_fn); } else { - err = bpf_program__load(prog, "GPL", 0); + err = bpf_object__load(obj); } - CHECK(err, title, "\n"); + ASSERT_OK(err, name); + + bpf_object__close(obj); + obj = NULL; } cleanup: bpf_object__close(obj); + bpf_object__close(obj_iter); } diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f62361306f6d..f4a13d9dd5c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -106,9 +106,9 @@ static int resolve_symbols(void) "Failed to load BTF from btf_data.o\n")) return -1; - nr = btf__get_nr_types(btf); + nr = btf__type_cnt(btf); - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id < nr; type_id++) { if (__resolve_symbol(btf, type_id)) break; } @@ -117,14 +117,14 @@ static int resolve_symbols(void) return 0; } -int test_resolve_btfids(void) +void test_resolve_btfids(void) { __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; unsigned int i, j; int ret = 0; if (resolve_symbols()) - return -1; + return; /* Check BTF_ID_LIST(test_list_local) and * BTF_ID_LIST_GLOBAL(test_list_global) IDs @@ -138,7 +138,7 @@ int test_resolve_btfids(void) test_symbols[i].name, test_list[i], test_symbols[i].id); if (ret) - return ret; + return; } } @@ -161,9 +161,7 @@ int test_resolve_btfids(void) if (i > 0) { if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check")) - return -1; + return; } } - - return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4706cee84360..9a80fe8a6427 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -58,7 +58,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } -static struct test_ringbuf *skel; +static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; static void trigger_samples() @@ -90,13 +90,13 @@ void test_ringbuf(void) int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; - skel = test_ringbuf__open(); + skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; skel->maps.ringbuf.max_entries = page_size; - err = test_ringbuf__load(skel); + err = test_ringbuf_lskel__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; @@ -154,7 +154,7 @@ void test_ringbuf(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; - err = test_ringbuf__attach(skel); + err = test_ringbuf_lskel__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; @@ -292,8 +292,8 @@ void test_ringbuf(void) CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", 1L, skel->bss->discarded); - test_ringbuf__detach(skel); + test_ringbuf_lskel__detach(skel); cleanup: ring_buffer__free(ringbuf); - test_ringbuf__destroy(skel); + test_ringbuf_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 4efd337d6a3c..3cfc910ab3c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -114,7 +114,7 @@ static int prepare_bpf_obj(void) err = bpf_object__load(obj); RET_ERR(err, "load bpf_object", "err:%d\n", err); - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); RET_ERR(select_by_skb_data_prog < 0, "get prog fd", @@ -858,7 +858,7 @@ out: cleanup(); } -void test_select_reuseport(void) +void serial_test_select_reuseport(void) { saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); if (saved_tcp_fo < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c index 189a34a7addb..15dacfcfaa6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -25,7 +25,8 @@ static void *worker(void *p) return NULL; } -void test_send_signal_sched_switch(void) +/* NOTE: cause events loss */ +void serial_test_send_signal_sched_switch(void) { struct test_send_signal_kern *skel; pthread_t threads[THREAD_COUNT]; diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index dfcbddcbe4d3..fdfdcff6cbef 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -42,7 +42,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) signal(SIGALRM, SIG_DFL); } -void test_signal_pending(enum bpf_prog_type prog_type) +void test_signal_pending(void) { test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER); test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 3a469099f30d..1d272e05188e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -48,7 +48,7 @@ configure_stack(void) return false; sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", "direct-action object-file ./test_sk_assign.o", - "section classifier/sk_assign_test", + "section tc", (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", "run with -vv for more info\n")) diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index aee41547e7f4..6db07401bc49 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -598,7 +598,7 @@ close: static void run_lookup_prog(const struct test *t) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; int client_fd, reuse_conn_fd = -1; struct bpf_link *lookup_link; int i, err; @@ -1053,7 +1053,7 @@ static void run_sk_assign(struct test_sk_lookup *skel, struct bpf_program *lookup_prog, const char *remote_ip, const char *local_ip) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; struct bpf_sk_lookup ctx; __u64 server_cookie; int i, err; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c index 2b392590e8ca..547ae53cde74 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -105,7 +105,7 @@ out: close(listen_fd); } -void test_sk_storage_tracing(void) +void serial_test_sk_storage_tracing(void) { struct test_sk_storage_trace_itself *skel_itself; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index fafeddaad6a9..c437e6ba8fe2 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -11,12 +11,14 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, + .ingress_ifindex = 11, .ifindex = 1, .tstamp = 7, .wire_len = 100, .gso_segs = 8, .mark = 9, .gso_size = 10, + .hwtstamp = 11, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, @@ -97,6 +99,10 @@ void test_skb_ctx(void) "ctx_out_ifindex", "skb->ifindex == %d, expected %d\n", skb.ifindex, 1); + CHECK_ATTR(skb.ingress_ifindex != 11, + "ctx_out_ingress_ifindex", + "skb->ingress_ifindex == %d, expected %d\n", + skb.ingress_ifindex, 11); CHECK_ATTR(skb.tstamp != 8, "ctx_out_tstamp", "skb->tstamp == %lld, expected %d\n", diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c new file mode 100644 index 000000000000..3eefdfed1db9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include <test_progs.h> +#include <sys/un.h> +#include "test_skc_to_unix_sock.skel.h" + +static const char *sock_path = "@skc_to_unix_sock"; + +void test_skc_to_unix_sock(void) +{ + struct test_skc_to_unix_sock *skel; + struct sockaddr_un sockaddr; + int err, sockfd = 0; + + skel = test_skc_to_unix_sock__open(); + if (!ASSERT_OK_PTR(skel, "could not open BPF object")) + return; + + skel->rodata->my_pid = getpid(); + + err = test_skc_to_unix_sock__load(skel); + if (!ASSERT_OK(err, "could not load BPF object")) + goto cleanup; + + err = test_skc_to_unix_sock__attach(skel); + if (!ASSERT_OK(err, "could not attach BPF object")) + goto cleanup; + + /* trigger unix_listen */ + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (!ASSERT_GT(sockfd, 0, "socket failed")) + goto cleanup; + + memset(&sockaddr, 0, sizeof(sockaddr)); + sockaddr.sun_family = AF_UNIX; + strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + sockaddr.sun_path[0] = '\0'; + + err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)); + if (!ASSERT_OK(err, "bind failed")) + goto cleanup; + + err = listen(sockfd, 1); + if (!ASSERT_OK(err, "listen failed")) + goto cleanup; + + ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed"); + +cleanup: + if (sockfd) + close(sockfd); + test_skc_to_unix_sock__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index f6f130c99b8c..180afd632f4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -16,8 +16,13 @@ void test_skeleton(void) struct test_skeleton* skel; struct test_skeleton__bss *bss; struct test_skeleton__data *data; + struct test_skeleton__data_dyn *data_dyn; struct test_skeleton__rodata *rodata; + struct test_skeleton__rodata_dyn *rodata_dyn; struct test_skeleton__kconfig *kcfg; + const void *elf_bytes; + size_t elf_bytes_sz = 0; + int i; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -28,7 +33,12 @@ void test_skeleton(void) bss = skel->bss; data = skel->data; + data_dyn = skel->data_dyn; rodata = skel->rodata; + rodata_dyn = skel->rodata_dyn; + + ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name"); + ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name"); /* validate values are pre-initialized correctly */ CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1); @@ -44,6 +54,12 @@ void test_skeleton(void) CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0); CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr"); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr"); + /* validate we can pre-setup global variables, even in .bss */ data->in1 = 10; data->in2 = 11; @@ -51,6 +67,10 @@ void test_skeleton(void) bss->in4 = 13; rodata->in.in6 = 14; + rodata_dyn->in_dynarr_sz = 4; + for (i = 0; i < 4; i++) + rodata_dyn->in_dynarr[i] = i + 10; + err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; @@ -62,6 +82,10 @@ void test_skeleton(void) CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr"); + /* now set new values and attach to get them into outX variables */ data->in1 = 1; data->in2 = 2; @@ -71,6 +95,8 @@ void test_skeleton(void) bss->in5.b = 6; kcfg = skel->kconfig; + skel->data_read_mostly->read_mostly_var = 123; + err = test_skeleton__attach(skel); if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; @@ -91,6 +117,15 @@ void test_skeleton(void) CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr"); + + ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); + ASSERT_OK_PTR(elf_bytes, "elf_bytes"); + ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); + cleanup: test_skeleton__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 8fd1b4b29a0e..394ebfc3bbf3 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -33,7 +33,7 @@ #define EXP_NO_BUF_RET 29 -void test_snprintf_positive(void) +static void test_snprintf_positive(void) { char exp_addr_out[] = EXP_ADDR_OUT; char exp_sym_out[] = EXP_SYM_OUT; @@ -103,7 +103,7 @@ static int load_single_snprintf(char *fmt) return ret; } -void test_snprintf_negative(void) +static void test_snprintf_negative(void) { ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 76e1f5fe18fa..dd41b826be30 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -6,7 +6,7 @@ /* Demonstrate that bpf_snprintf_btf succeeds and that various data types * are formatted correctly. */ -void test_snprintf_btf(void) +void serial_test_snprintf_btf(void) { struct netif_receive_skb *skel; struct netif_receive_skb__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 577d619fb07e..fae40db4d81f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -329,7 +329,7 @@ done: close(listen_fd); } -void test_sock_fields(void) +void serial_test_sock_fields(void) { struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523..2a9cb951bfd6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int err, n; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, goto close_peer1; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_peer1: xclose(p1); @@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; u32 key; char b; @@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -2037,7 +2002,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_udp_unix_redir(skel, map, family); } -void test_sockmap_listen(void) +void serial_test_sockmap_listen(void) { struct test_sockmap_listen *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 51fac975b316..bc34f7773444 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include "cgroup_helpers.h" -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -15,23 +15,23 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) return -1; } - prog = bpf_object__find_program_by_title(obj, title); + prog = bpf_object__find_program_by_name(obj, name); if (!prog) { - log_err("Failed to find %s BPF program", title); + log_err("Failed to find %s BPF program", name); return -1; } err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, attach_type, BPF_F_ALLOW_MULTI); if (err) { - log_err("Failed to attach %s BPF program", title); + log_err("Failed to attach %s BPF program", name); return -1; } return 0; } -static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title) +static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -42,7 +42,7 @@ static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title) if (err) return -1; - prog = bpf_object__find_program_by_title(obj, title); + prog = bpf_object__find_program_by_name(obj, name); if (!prog) return -1; @@ -89,7 +89,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - child: 0x80 -> 0x90 */ - err = prog_attach(obj, cg_child, "cgroup/getsockopt/child"); + err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); if (err) goto detach; @@ -113,7 +113,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: 0x90 -> 0xA0 */ - err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent"); + err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); if (err) goto detach; @@ -157,7 +157,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: unexpected 0x40, EPERM */ - err = prog_detach(obj, cg_child, "cgroup/getsockopt/child"); + err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); if (err) { log_err("Failed to detach child program"); goto detach; @@ -198,8 +198,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/getsockopt/child"); - prog_detach(obj, cg_parent, "cgroup/getsockopt/parent"); + prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); + prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); return err; } @@ -236,7 +236,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach child program and make sure it adds 0x10. */ - err = prog_attach(obj, cg_child, "cgroup/setsockopt"); + err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); if (err) goto detach; @@ -263,7 +263,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach parent program and make sure it adds another 0x10. */ - err = prog_attach(obj, cg_parent, "cgroup/setsockopt"); + err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); if (err) goto detach; @@ -289,8 +289,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/setsockopt"); - prog_detach(obj, cg_parent, "cgroup/setsockopt"); + prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); + prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index b5940e6ca67c..9825f1f7bfcc 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -21,7 +21,7 @@ static void test_tailcall_1(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -38,9 +38,9 @@ static void test_tailcall_1(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -70,9 +70,9 @@ static void test_tailcall_1(void) err, errno, retval); for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -92,9 +92,9 @@ static void test_tailcall_1(void) for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { j = bpf_map__def(prog_array)->max_entries - 1 - i; - snprintf(prog_name, sizeof(prog_name), "classifier/%i", j); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", j); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -159,7 +159,7 @@ static void test_tailcall_2(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -176,9 +176,9 @@ static void test_tailcall_2(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -219,10 +219,7 @@ out: bpf_object__close(obj); } -/* test_tailcall_3 checks that the count value of the tail call limit - * enforcement matches with expectations. - */ -static void test_tailcall_3(void) +static void test_tailcall_count(const char *which) { int err, map_fd, prog_fd, main_fd, data_fd, i, val; struct bpf_map *prog_array, *data_map; @@ -231,12 +228,12 @@ static void test_tailcall_3(void) __u32 retval, duration; char buff[128] = {}; - err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -252,7 +249,7 @@ static void test_tailcall_3(void) if (CHECK_FAIL(map_fd < 0)) goto out; - prog = bpf_object__find_program_by_title(obj, "classifier/0"); + prog = bpf_object__find_program_by_name(obj, "classifier_0"); if (CHECK_FAIL(!prog)) goto out; @@ -296,6 +293,22 @@ out: bpf_object__close(obj); } +/* test_tailcall_3 checks that the count value of the tail call limit + * enforcement matches with expectations. JIT uses direct jump. + */ +static void test_tailcall_3(void) +{ + test_tailcall_count("tailcall3.o"); +} + +/* test_tailcall_6 checks that the count value of the tail call limit + * enforcement matches with expectations. JIT uses indirect jump. + */ +static void test_tailcall_6(void) +{ + test_tailcall_count("tailcall6.o"); +} + /* test_tailcall_4 checks that the kernel properly selects indirect jump * for the case where the key is not known. Latter is passed via global * data to select different targets we can compare return value of. @@ -316,7 +329,7 @@ static void test_tailcall_4(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -341,9 +354,9 @@ static void test_tailcall_4(void) return; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -404,7 +417,7 @@ static void test_tailcall_5(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -429,9 +442,9 @@ static void test_tailcall_5(void) return; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -490,7 +503,7 @@ static void test_tailcall_bpf2bpf_1(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -508,9 +521,9 @@ static void test_tailcall_bpf2bpf_1(void) /* nop -> jmp */ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -574,7 +587,7 @@ static void test_tailcall_bpf2bpf_2(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -590,7 +603,7 @@ static void test_tailcall_bpf2bpf_2(void) if (CHECK_FAIL(map_fd < 0)) goto out; - prog = bpf_object__find_program_by_title(obj, "classifier/0"); + prog = bpf_object__find_program_by_name(obj, "classifier_0"); if (CHECK_FAIL(!prog)) goto out; @@ -652,7 +665,7 @@ static void test_tailcall_bpf2bpf_3(void) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -669,9 +682,9 @@ static void test_tailcall_bpf2bpf_3(void) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -749,7 +762,7 @@ static void test_tailcall_bpf2bpf_4(bool noise) if (CHECK_FAIL(err)) return; - prog = bpf_object__find_program_by_title(obj, "classifier"); + prog = bpf_object__find_program_by_name(obj, "entry"); if (CHECK_FAIL(!prog)) goto out; @@ -766,9 +779,9 @@ static void test_tailcall_bpf2bpf_4(bool noise) goto out; for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { - snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK_FAIL(!prog)) goto out; @@ -822,6 +835,8 @@ void test_tailcalls(void) test_tailcall_4(); if (test__start_subtest("tailcall_5")) test_tailcall_5(); + if (test__start_subtest("tailcall_6")) + test_tailcall_6(); if (test__start_subtest("tailcall_bpf2bpf_1")) test_tailcall_bpf2bpf_1(); if (test__start_subtest("tailcall_bpf2bpf_2")) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index e7201ba29ccd..4b18b73df10b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -176,6 +176,18 @@ static int netns_setup_namespaces(const char *verb) return 0; } +static void netns_setup_namespaces_nofail(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); + system(cmd); + ns++; + } +} + struct netns_setup_result { int ifindex_veth_src_fwd; int ifindex_veth_dst_fwd; @@ -633,7 +645,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) struct nstoken *nstoken = NULL; int err; int tunnel_pid = -1; - int src_fd, target_fd; + int src_fd, target_fd = -1; int ifindex; /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. @@ -762,6 +774,8 @@ fail: static void *test_tc_redirect_run_tests(void *arg) { + netns_setup_namespaces_nofail("delete"); + RUN_TEST(tc_redirect_peer); RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_neigh); @@ -769,7 +783,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void test_tc_redirect(void) +void serial_test_tc_redirect(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index d207e968e6b1..265b4fe33ec3 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -109,7 +109,7 @@ static int run_test(int cgroup_fd, int server_fd) return -1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); map_fd = bpf_map__fd(map); err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index 0252f61d611a..97d8a6f84f4a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -43,7 +43,7 @@ static int process_sample(void *ctx, void *data, size_t len) void test_test_ima(void) { char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; - struct ring_buffer *ringbuf; + struct ring_buffer *ringbuf = NULL; const char *measured_dir; char cmd[256]; @@ -85,5 +85,6 @@ close_clean: err = system(cmd); CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno); close_prog: + ring_buffer__free(ringbuf); ima__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 25f40e1b9967..0f4e49e622cd 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -39,7 +39,8 @@ static int timer(struct timer *timer_skel) return 0; } -void test_timer(void) +/* TODO: use pid filtering */ +void serial_test_timer(void) { struct timer *timer_skel = NULL; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index ced8f6cf347c..949a0617869d 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -52,7 +52,7 @@ static int timer_mim(struct timer_mim *timer_skel) return 0; } -void test_timer_mim(void) +void serial_test_timer_mim(void) { struct timer_mim_reject *timer_reject_skel = NULL; libbpf_print_fn_t old_print_fn = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index fb095e5cd9af..8652d0a46c87 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -void test_tp_attach_query(void) +void serial_test_tp_attach_query(void) { const int num_progs = 3; int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index d39bc00feb45..cade7f12315f 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -8,35 +8,34 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" -void test_trace_printk(void) +void serial_test_trace_printk(void) { - int err, iter = 0, duration = 0, found = 0; - struct trace_printk__bss *bss; - struct trace_printk *skel; + struct trace_printk_lskel__bss *bss; + int err = 0, iter = 0, found = 0; + struct trace_printk_lskel *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_printk__open(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + skel = trace_printk_lskel__open(); + if (!ASSERT_OK_PTR(skel, "trace_printk__open")) return; - ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string"); + ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]"); skel->rodata->fmt[0] = 't'; - err = trace_printk__load(skel); - if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + err = trace_printk_lskel__load(skel); + if (!ASSERT_OK(err, "trace_printk__load")) goto cleanup; bss = skel->bss; - err = trace_printk__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + err = trace_printk_lskel__attach(skel); + if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; fp = fopen(TRACEBUF, "r"); - if (CHECK(fp == NULL, "could not open trace buffer", - "error %d opening %s", errno, TRACEBUF)) + if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) goto cleanup; /* We do not want to wait forever if this test fails... */ @@ -44,16 +43,12 @@ void test_trace_printk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_printk__detach(skel); + trace_printk_lskel__detach(skel); - if (CHECK(bss->trace_printk_ran == 0, - "bpf_trace_printk never ran", - "ran == %d", bss->trace_printk_ran)) + if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran")) goto cleanup; - if (CHECK(bss->trace_printk_ret <= 0, - "bpf_trace_printk returned <= 0 value", - "got %d", bss->trace_printk_ret)) + if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret")) goto cleanup; /* verify our search string is in the trace buffer */ @@ -66,12 +61,11 @@ void test_trace_printk(void) break; } - if (CHECK(!found, "message from bpf_trace_printk not found", - "no instance of %s in %s", SEARCHMSG, TRACEBUF)) + if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) goto cleanup; cleanup: - trace_printk__destroy(skel); + trace_printk_lskel__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c new file mode 100644 index 000000000000..7a4e313e8558 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> + +#include "trace_vprintk.lskel.h" + +#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" +#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" + +void serial_test_trace_vprintk(void) +{ + struct trace_vprintk_lskel__bss *bss; + int err = 0, iter = 0, found = 0; + struct trace_vprintk_lskel *skel; + char *buf = NULL; + FILE *fp = NULL; + size_t buflen; + + skel = trace_vprintk_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) + goto cleanup; + + bss = skel->bss; + + err = trace_vprintk_lskel__attach(skel); + if (!ASSERT_OK(err, "trace_vprintk__attach")) + goto cleanup; + + fp = fopen(TRACEBUF, "r"); + if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)")) + goto cleanup; + + /* We do not want to wait forever if this test fails... */ + fcntl(fileno(fp), F_SETFL, O_NONBLOCK); + + /* wait for tracepoint to trigger */ + usleep(1); + trace_vprintk_lskel__detach(skel); + + if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran")) + goto cleanup; + + if (!ASSERT_GT(bss->trace_vprintk_ret, 0, "bss->trace_vprintk_ret")) + goto cleanup; + + /* verify our search string is in the trace buffer */ + while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) { + if (strstr(buf, SEARCHMSG) != NULL) + found++; + if (found == bss->trace_vprintk_ran) + break; + if (++iter > 1000) + break; + } + + if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found")) + goto cleanup; + + if (!ASSERT_LT(bss->null_data_vprintk_ret, 0, "bss->null_data_vprintk_ret")) + goto cleanup; + +cleanup: + trace_vprintk_lskel__destroy(skel); + free(buf); + if (fp) + fclose(fp); +} diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index d7f5a931d7f3..fc146671b20a 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -41,7 +41,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) return bpf_program__attach_trace(prog); } -void test_trampoline_count(void) +/* TODO: use different target function to run in concurrent mode */ +void serial_test_trampoline_count(void) { const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c new file mode 100644 index 000000000000..a47e7c0e1ffd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> + +#include "trace_vprintk.lskel.h" + +void test_verif_stats(void) +{ + __u32 len = sizeof(struct bpf_prog_info); + struct trace_vprintk_lskel *skel; + struct bpf_prog_info info = {}; + int err; + + skel = trace_vprintk_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) + goto cleanup; + + err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto cleanup; + + if (!ASSERT_GT(info.verified_insns, 0, "verified_insns")) + goto cleanup; + +cleanup: + trace_vprintk_lskel__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index d5c98f2cb12f..f529e3c923ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include <network_helpers.h> -void test_xdp_adjust_tail_shrink(void) +static void test_xdp_adjust_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; __u32 duration, retval, size, expect_sz; @@ -30,7 +30,7 @@ void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; struct bpf_object *obj; @@ -58,7 +58,7 @@ void test_xdp_adjust_tail_grow(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow2(void) +static void test_xdp_adjust_tail_grow2(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; char buf[4096]; /* avoid segfault: large buf to hold grow results */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 15ef3531483e..4c4057262cd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void test_xdp_attach(void) +void serial_test_xdp_attach(void) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index ad3ba81b4048..faa22b84f2ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -519,7 +519,7 @@ static struct bond_test_case bond_test_cases[] = { { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, }; -void test_xdp_bonding(void) +void serial_test_xdp_bonding(void) { libbpf_print_fn_t old_print_fn; struct skeletons skeletons = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 8755effd80b0..fd812bd43600 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,7 +7,7 @@ #define IFINDEX_LO 1 -void test_xdp_cpumap_attach(void) +void serial_test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index c72af030ff10..3079d5568f8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -8,7 +8,7 @@ #define IFINDEX_LO 1 -void test_xdp_with_devmap_helpers(void) +static void test_xdp_with_devmap_helpers(void) { struct test_xdp_with_devmap_helpers *skel; struct bpf_prog_info info = {}; @@ -60,7 +60,7 @@ out_close: test_xdp_with_devmap_helpers__destroy(skel); } -void test_neg_xdp_devmap_helpers(void) +static void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; @@ -72,7 +72,7 @@ void test_neg_xdp_devmap_helpers(void) } -void test_xdp_devmap_attach(void) +void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index d2d7a283d72f..4e2a4fd56f67 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 -void test_xdp_info(void) +void serial_test_xdp_info(void) { __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; const char *file = "./xdp_dummy.o"; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 46eed0a33c23..983ab0b47d30 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -6,7 +6,7 @@ #define IFINDEX_LO 1 -void test_xdp_link(void) +void serial_test_xdp_link(void) { __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c new file mode 100644 index 000000000000..f3927829a55a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "test_progs.h" +#include "xdpwall.skel.h" + +void test_xdpwall(void) +{ + struct xdpwall *skel; + + skel = xdpwall__open_and_load(); + ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?"); + + xdpwall__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c index c245345e41ca..16e57313204a 100644 --- a/tools/testing/selftests/bpf/progs/atomics.c +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -10,6 +10,8 @@ bool skip_tests __attribute((__section__(".data"))) = false; bool skip_tests = true; #endif +__u32 pid = 0; + __u64 add64_value = 1; __u64 add64_result = 0; __u32 add32_value = 1; @@ -21,6 +23,8 @@ __u64 add_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(add, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 add_stack_value = 1; @@ -45,6 +49,8 @@ __s64 sub_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(sub, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 sub_stack_value = 1; @@ -67,6 +73,8 @@ __u64 and_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(and, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); @@ -86,6 +94,8 @@ __u64 or_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(or, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); or32_result = __sync_fetch_and_or(&or32_value, 0x011); @@ -104,6 +114,8 @@ __u64 xor_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(xor, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); @@ -123,6 +135,8 @@ __u32 cmpxchg32_result_succeed = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(cmpxchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); @@ -142,6 +156,8 @@ __u32 xchg32_result = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(xchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 val64 = 2; __u32 val32 = 2; diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c new file mode 100644 index 000000000000..d9a88dd1ea65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +__u8 rand_vals[2500000]; +const __u32 nr_rand_bytes = 2500000; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + /* max entries and value_size will be set programmatically. + * They are configurable from the userspace bench program. + */ +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + /* max entries, value_size, and # of hash functions will be set + * programmatically. They are configurable from the userspace + * bench program. + */ + __uint(map_extra, 3); +} bloom_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + /* max entries, key_size, and value_size, will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} hashmap SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; + bool update; +}; + +/* Tracks the number of hits, drops, and false hits */ +struct { + __u32 stats[3]; +} __attribute__((__aligned__(256))) percpu_stats[256]; + +const __u32 hit_key = 0; +const __u32 drop_key = 1; +const __u32 false_hit_key = 2; + +__u8 value_size; + +const volatile bool hashmap_use_bloom; +const volatile bool count_false_hits; + +int error = 0; + +static __always_inline void log_result(__u32 key) +{ + __u32 cpu = bpf_get_smp_processor_id(); + + percpu_stats[cpu & 255].stats[key]++; +} + +static __u64 +bloom_callback(struct bpf_map *map, __u32 *key, void *val, + struct callback_ctx *data) +{ + int err; + + if (data->update) + err = bpf_map_push_elem(data->map, val, 0); + else + err = bpf_map_peek_elem(data->map, val); + + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + log_result(hit_key); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_lookup(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = false; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_update(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = true; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_hashmap_lookup(void *ctx) +{ + __u64 *result; + int i, err; + + __u32 index = bpf_get_prandom_u32(); + __u32 bitmask = (1ULL << 21) - 1; + + for (i = 0; i < 1024; i++, index += value_size) { + index = index & bitmask; + + if (hashmap_use_bloom) { + err = bpf_map_peek_elem(&bloom_map, + rand_vals + index); + if (err) { + if (err != -ENOENT) { + error |= 2; + return 0; + } + log_result(hit_key); + continue; + } + } + + result = bpf_map_lookup_elem(&hashmap, + rand_vals + index); + if (result) { + log_result(hit_key); + } else { + if (hashmap_use_bloom && count_false_hits) + log_result(false_hit_key); + log_result(drop_key); + } + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c new file mode 100644 index 000000000000..1316f3db79d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1000); +} map_random_data SEC(".maps"); + +struct map_bloom_type { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + __type(value, __u32); + __uint(max_entries, 10000); + __uint(map_extra, 5); +} map_bloom SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct map_bloom_type); +} outer_map SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; +}; + +int error = 0; + +static __u64 +check_elem(struct bpf_map *map, __u32 *key, __u32 *val, + struct callback_ctx *data) +{ + int err; + + err = bpf_map_peek_elem(data->map, val); + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int inner_map(void *ctx) +{ + struct bpf_map *inner_map; + struct callback_ctx data; + int key = 0; + + inner_map = bpf_map_lookup_elem(&outer_map, &key); + if (!inner_map) { + error |= 2; + return 0; + } + + data.map = inner_map; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int check_bloom(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&map_bloom; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index f62df4d023f9..d9660e7200e2 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -169,11 +169,7 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } -/* "struct_ops/" prefix is not a requirement - * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS - * as long as it is used in one of the func ptr - * under SEC(".struct_ops"). - */ +/* "struct_ops/" prefix is a requirement */ SEC("struct_ops/bpf_cubic_init") void BPF_PROG(bpf_cubic_init, struct sock *sk) { @@ -188,10 +184,8 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -/* No prefix in SEC will also work. - * The remaining tcp-cubic functions have an easier way. - */ -SEC("no-sec-prefix-bictcp_cwnd_event") +/* "struct_ops" prefix is a requirement */ +SEC("struct_ops/bpf_cubic_cwnd_event") void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 95a5a0778ed7..f266c757b3df 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -19,9 +19,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; #define PROG(F) PROG_(F, _##F) -#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME +#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM /* These are the identifiers of the BPF programs that will be used in tail * calls. Name is limited to 16 characters, with the terminating character and diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index 8f44767a75fa..e5560a656030 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -11,7 +11,7 @@ /* *struct bitfields_only_mixed_types { * int a: 3; - * long int b: 2; + * long b: 2; * _Bool c: 1; * enum { * A = 0, @@ -27,7 +27,7 @@ struct bitfields_only_mixed_types { int a: 3; - long int b: 2; + long b: 2; bool c: 1; /* it's really a _Bool type */ enum { A, /* A = 0, dumper is very explicit */ @@ -44,8 +44,8 @@ struct bitfields_only_mixed_types { * char: 4; * int a: 4; * short b; - * long int c; - * long int d: 8; + * long c; + * long d: 8; * int e; * int f; *}; @@ -71,7 +71,7 @@ struct bitfield_mixed_with_others { *struct bitfield_flushed { * int a: 4; * long: 60; - * long int b: 16; + * long b: 16; *}; * */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index 1cef3bec1dc7..e304b6204bd9 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -29,7 +29,7 @@ struct non_packed_fields { struct nested_packed { char: 4; int a: 4; - long int b; + long b; struct { char c; int d; @@ -44,7 +44,7 @@ union union_is_never_packed { union union_does_not_need_packing { struct { - long int a; + long a; int b; } __attribute__((packed)); int c; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 35c512818a56..f2661c8d2d90 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -9,7 +9,7 @@ /* ----- START-EXPECTED-OUTPUT ----- */ struct padded_implicitly { int a; - long int b; + long b; char c; }; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 8aaa24a00322..1c7105fcae3c 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -189,7 +189,7 @@ struct struct_with_embedded_stuff { const char *d; } e; union { - volatile long int f; + volatile long f; void * restrict g; }; }; diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c index a25373002055..3f81ff92184c 100644 --- a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c @@ -20,7 +20,7 @@ struct { __u32 invocations = 0; -SEC("cgroup_skb/egress/1") +SEC("cgroup_skb/egress") int egress1(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = @@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb) return 1; } -SEC("cgroup_skb/egress/2") +SEC("cgroup_skb/egress") int egress2(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c index a149f33bc533..d662db27fe4a 100644 --- a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c +++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c @@ -20,7 +20,7 @@ struct { __u32 invocations = 0; -SEC("cgroup_skb/egress/1") +SEC("cgroup_skb/egress") int egress1(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = @@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb) return 1; } -SEC("cgroup_skb/egress/2") +SEC("cgroup_skb/egress") int egress2(struct __sk_buff *skb) { struct cgroup_value *ptr_cg_storage = diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 3f757e30d7a0..88638315c582 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -14,7 +14,6 @@ #include <sys/types.h> #include <sys/socket.h> -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; __u16 g_serv_port = 0; diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c index b565d997810a..d3f4c5e4fb69 100644 --- a/tools/testing/selftests/bpf/progs/connect4_dropper.c +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -18,7 +18,7 @@ int connect_v4_dropper(struct bpf_sock_addr *ctx) { if (ctx->type != SOCK_STREAM) return VERDICT_PROCEED; - if (ctx->user_port == bpf_htons(60123)) + if (ctx->user_port == bpf_htons(60120)) return VERDICT_REJECT; return VERDICT_PROCEED; } diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index a943d394fd3a..b241932911db 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -31,8 +31,6 @@ #define IFNAMSIZ 16 #endif -int _version SEC("version") = 1; - __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 506d0f81a375..40266d2c737c 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -24,8 +24,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index a979aaef2a76..27a632dd382e 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -13,7 +13,6 @@ #include <bpf_sockopt_helpers.h> char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr; diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index afc8f1c5a9d6..19cad93e612f 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -12,7 +12,6 @@ #include <bpf_sockopt_helpers.h> char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr[4]; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index 8924e06bdef0..79b54a4fa244 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops.c new file mode 100644 index 000000000000..ead87edb75e2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dummy_st_ops.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct bpf_dummy_ops_state { + int val; +} __attribute__((preserve_access_index)); + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *state); + int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1, struct bpf_dummy_ops_state *state) +{ + int ret; + + if (!state) + return 0xf2f3f4f5; + + ret = state->val; + state->val = 0x5a; + return ret; +} + +__u64 test_2_args[5]; + +SEC("struct_ops/test_2") +int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4) +{ + test_2_args[0] = (unsigned long)state; + test_2_args[1] = a1; + test_2_args[2] = a2; + test_2_args[3] = a3; + test_2_args[4] = a4; + return 0; +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c index 03a672d76353..bca92c9bd29a 100644 --- a/tools/testing/selftests/bpf/progs/fexit_sleep.c +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -13,7 +13,7 @@ int fexit_cnt = 0; SEC("fentry/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fentry_cnt++; @@ -23,7 +23,7 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) SEC("fexit/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fexit_cnt++; diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c index 75e8e1069fe7..df918b2469da 100644 --- a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c @@ -47,7 +47,7 @@ check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val, u32 arraymap_output = 0; -SEC("classifier") +SEC("tc") int test_pkt_access(struct __sk_buff *skb) { struct callback_ctx data; diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c index 913dd91aafff..276994d5c0c7 100644 --- a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c @@ -78,7 +78,7 @@ int hashmap_output = 0; int hashmap_elems = 0; int percpu_map_elems = 0; -SEC("classifier") +SEC("tc") int test_pkt_access(struct __sk_buff *skb) { struct callback_ctx data; diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c new file mode 100644 index 000000000000..a1b139888048 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +__u64 test1_hits = 0; +__u64 address_low = 0; +__u64 address_high = 0; +int wasted_entries = 0; +long total_entries = 0; + +#define ENTRY_CNT 32 +struct perf_branch_entry entries[ENTRY_CNT] = {}; + +static inline bool in_range(__u64 val) +{ + return (val >= address_low) && (val < address_high); +} + +SEC("fexit/bpf_testmod_loop_test") +int BPF_PROG(test1, int n, int ret) +{ + long i; + + total_entries = bpf_get_branch_snapshot(entries, sizeof(entries), 0); + total_entries /= sizeof(struct perf_branch_entry); + + for (i = 0; i < ENTRY_CNT; i++) { + if (i >= total_entries) + break; + if (in_range(entries[i].from) && in_range(entries[i].to)) + test1_hits++; + else if (!test1_hits) + wasted_entries++; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 6b42db2fe391..68587b1de34e 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -37,4 +37,3 @@ int trace(void *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 55e283050cab..7236da72ce80 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -9,8 +9,8 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } perf_buf_map SEC(".maps"); #define _(P) (__builtin_preserve_access_index(P)) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 470f8723e463..8a8cf59017aa 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -8,7 +8,7 @@ extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, __u32 c, __u64 d) __ksym; -SEC("classifier") +SEC("tc") int kfunc_call_test2(struct __sk_buff *skb) { struct bpf_sock *sk = skb->sk; @@ -23,7 +23,7 @@ int kfunc_call_test2(struct __sk_buff *skb) return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); } -SEC("classifier") +SEC("tc") int kfunc_call_test1(struct __sk_buff *skb) { struct bpf_sock *sk = skb->sk; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index 5fbd9e232d44..c1fdecabeabf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -33,7 +33,7 @@ int __noinline f1(struct __sk_buff *skb) return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); } -SEC("classifier") +SEC("tc") int kfunc_call_test1(struct __sk_buff *skb) { return f1(skb); diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index d1d304c980f0..b1b711d9b214 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -683,5 +683,4 @@ int cg_skb(void *ctx) return 1; } -__u32 _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 43649bce4c54..f718b2c212dc 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -68,4 +68,3 @@ int bpf_nextcnt(struct __sk_buff *skb) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c index 25467d13c356..b3fcb5274ee0 100644 --- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -11,8 +11,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 16384); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(stack_trace_t)); + __type(key, __u32); + __type(value, stack_trace_t); } stackmap SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c index 49f679375b9d..3c2423bb19e2 100644 --- a/tools/testing/selftests/bpf/progs/recursion.c +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -24,8 +24,8 @@ struct { int pass1 = 0; int pass2 = 0; -SEC("fentry/__htab_map_lookup_elem") -int BPF_PROG(on_lookup, struct bpf_map *map) +SEC("fentry/htab_map_delete_elem") +int BPF_PROG(on_delete, struct bpf_map *map) { int key = 0; @@ -35,10 +35,7 @@ int BPF_PROG(on_lookup, struct bpf_map *map) } if (map == (void *)&hash2) { pass2++; - /* htab_map_gen_lookup() will inline below call - * into direct call to __htab_map_lookup_elem() - */ - bpf_map_lookup_elem(&hash2, &key); + bpf_map_delete_elem(&hash2, &key); return 0; } diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ac5abc34cde8..ea75a44cb7fc 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -18,8 +18,6 @@ #define DST_PORT 4040 #define DST_REWRITE_PORT4 4444 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 24694b1a8d82..bf9b46b806f6 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -22,8 +22,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg6") int sendmsg_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index 7f2eaa2f89f8..992b7861003a 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -25,7 +25,7 @@ out: return ip; } -SEC("classifier/cls") +SEC("tc") int main_prog(struct __sk_buff *skb) { struct iphdr *ip = NULL; diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index ca283af80d4e..95d5b941bc1f 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -2,8 +2,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index eeaf6e75c9a2..80632954c5a1 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -3,8 +3,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 4797dc985064..e2468a6d01a5 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -2,27 +2,25 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } sock_map_rx SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } sock_map_tx SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } sock_map_msg SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index c6d428a8d785..9fb241b97291 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -3,7 +3,6 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 9d8c212dde9f..177a59069dae 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -4,9 +4,8 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; -SEC("cgroup/getsockopt/child") +SEC("cgroup/getsockopt") int _getsockopt_child(struct bpf_sockopt *ctx) { __u8 *optval_end = ctx->optval_end; @@ -29,7 +28,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx) return 1; } -SEC("cgroup/getsockopt/parent") +SEC("cgroup/getsockopt") int _getsockopt_parent(struct bpf_sockopt *ctx) { __u8 *optval_end = ctx->optval_end; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 7de534f38c3f..60c93aee2f4a 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -358,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, void *payload) { void *location; - uint32_t len; + uint64_t len; data->str_lens[idx] = 0; location = calc_location(&cfg->str_locs[idx], tls_base); @@ -390,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; void *location; - uint32_t len; + uint64_t len; int i; descr->tag_len = 0; /* presume no tag is set */ diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c new file mode 100644 index 000000000000..1792f4eda095 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tag.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#if __has_attribute(btf_decl_tag) +#define __tag1 __attribute__((btf_decl_tag("tag1"))) +#define __tag2 __attribute__((btf_decl_tag("tag2"))) +volatile const bool skip_tests __tag1 __tag2 = false; +#else +#define __tag1 +#define __tag2 +volatile const bool skip_tests = true; +#endif + +struct key_t { + int a; + int b __tag1 __tag2; + int c; +} __tag1 __tag2; + +typedef struct { + int a; + int b; +} value_t __tag1 __tag2; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, value_t); +} hashmap1 SEC(".maps"); + + +static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2 +{ + struct key_t key; + value_t val = {}; + + key.a = key.b = key.c = x; + bpf_map_update_elem(&hashmap1, &key, &val, 0); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int x) +{ + return foo(x); +} diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 7115bcefbe8a..8159a0b4a69a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -11,8 +11,8 @@ struct { } jmp_table SEC(".maps"); #define TAIL_FUNC(x) \ - SEC("classifier/" #x) \ - int bpf_func_##x(struct __sk_buff *skb) \ + SEC("tc") \ + int classifier_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -20,7 +20,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { /* Multiple locations to make sure we patch @@ -45,4 +45,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index 0431e4fe7efd..a5ff53e61702 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -10,41 +10,41 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -SEC("classifier/0") -int bpf_func_0(struct __sk_buff *skb) +SEC("tc") +int classifier_0(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 1); return 0; } -SEC("classifier/1") -int bpf_func_1(struct __sk_buff *skb) +SEC("tc") +int classifier_1(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 2); return 1; } -SEC("classifier/2") -int bpf_func_2(struct __sk_buff *skb) +SEC("tc") +int classifier_2(struct __sk_buff *skb) { return 2; } -SEC("classifier/3") -int bpf_func_3(struct __sk_buff *skb) +SEC("tc") +int classifier_3(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 4); return 3; } -SEC("classifier/4") -int bpf_func_4(struct __sk_buff *skb) +SEC("tc") +int classifier_4(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 3); return 4; } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -56,4 +56,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index 910858fe078a..f60bcd7b8d4b 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -12,15 +12,15 @@ struct { int count = 0; -SEC("classifier/0") -int bpf_func_0(struct __sk_buff *skb) +SEC("tc") +int classifier_0(struct __sk_buff *skb) { count++; bpf_tail_call_static(skb, &jmp_table, 0); return 1; } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -28,4 +28,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c index bd4be135c39d..a56bbc2313ca 100644 --- a/tools/testing/selftests/bpf/progs/tailcall4.c +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -13,8 +13,8 @@ struct { int selector = 0; #define TAIL_FUNC(x) \ - SEC("classifier/" #x) \ - int bpf_func_##x(struct __sk_buff *skb) \ + SEC("tc") \ + int classifier_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -22,7 +22,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { bpf_tail_call(skb, &jmp_table, selector); @@ -30,4 +30,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c index adf30a33064e..8d03496eb6ca 100644 --- a/tools/testing/selftests/bpf/progs/tailcall5.c +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -13,8 +13,8 @@ struct { int selector = 0; #define TAIL_FUNC(x) \ - SEC("classifier/" #x) \ - int bpf_func_##x(struct __sk_buff *skb) \ + SEC("tc") \ + int classifier_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -22,7 +22,7 @@ TAIL_FUNC(0) TAIL_FUNC(1) TAIL_FUNC(2) -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { int idx = 0; @@ -37,4 +37,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall6.c b/tools/testing/selftests/bpf/progs/tailcall6.c new file mode 100644 index 000000000000..d77b8abd62f3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall6.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +int count, which; + +SEC("tc") +int classifier_0(struct __sk_buff *skb) +{ + count++; + if (__builtin_constant_p(which)) + __bpf_unreachable(); + bpf_tail_call(skb, &jmp_table, which); + return 1; +} + +SEC("tc") +int entry(struct __sk_buff *skb) +{ + if (__builtin_constant_p(which)) + __bpf_unreachable(); + bpf_tail_call(skb, &jmp_table, which); + return 0; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c index 0103f3dd9f02..8c91428deb90 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -10,8 +10,8 @@ struct { } jmp_table SEC(".maps"); #define TAIL_FUNC(x) \ - SEC("classifier/" #x) \ - int bpf_func_##x(struct __sk_buff *skb) \ + SEC("tc") \ + int classifier_##x(struct __sk_buff *skb) \ { \ return x; \ } @@ -26,7 +26,7 @@ int subprog_tail(struct __sk_buff *skb) return skb->len * 2; } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 1); @@ -35,4 +35,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index 3cc4c12817b5..ce97d141daee 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -22,14 +22,14 @@ int subprog_tail(struct __sk_buff *skb) int count = 0; -SEC("classifier/0") -int bpf_func_0(struct __sk_buff *skb) +SEC("tc") +int classifier_0(struct __sk_buff *skb) { count++; return subprog_tail(skb); } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { bpf_tail_call_static(skb, &jmp_table, 0); @@ -38,4 +38,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 0d5482bea6c9..7fab39a3bb12 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -33,23 +33,23 @@ int subprog_tail(struct __sk_buff *skb) return skb->len * 2; } -SEC("classifier/0") -int bpf_func_0(struct __sk_buff *skb) +SEC("tc") +int classifier_0(struct __sk_buff *skb) { volatile char arr[128] = {}; return subprog_tail2(skb); } -SEC("classifier/1") -int bpf_func_1(struct __sk_buff *skb) +SEC("tc") +int classifier_1(struct __sk_buff *skb) { volatile char arr[128] = {}; return skb->len * 3; } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { volatile char arr[128] = {}; @@ -58,4 +58,3 @@ int entry(struct __sk_buff *skb) } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index e89368a50b97..b67e8022d500 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -50,30 +50,29 @@ int subprog_tail(struct __sk_buff *skb) return skb->len; } -SEC("classifier/1") -int bpf_func_1(struct __sk_buff *skb) +SEC("tc") +int classifier_1(struct __sk_buff *skb) { return subprog_tail_2(skb); } -SEC("classifier/2") -int bpf_func_2(struct __sk_buff *skb) +SEC("tc") +int classifier_2(struct __sk_buff *skb) { count++; return subprog_tail_2(skb); } -SEC("classifier/0") -int bpf_func_0(struct __sk_buff *skb) +SEC("tc") +int classifier_0(struct __sk_buff *skb) { return subprog_tail_1(skb); } -SEC("classifier") +SEC("tc") int entry(struct __sk_buff *skb) { return subprog_tail(skb); } char __license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0cb3204ddb18..0988d79f1587 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -3,7 +3,6 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; struct tcp_rtt_storage { __u32 invoked; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 31538c9ed193..160ead6c67b2 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -4,8 +4,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index c1e0c8c7c55f..c218cf8989a9 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -21,8 +21,8 @@ struct inner_map_sz2 { struct outer_arr { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); /* it's possible to use anonymous struct as inner map definition here */ __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -61,8 +61,8 @@ struct inner_map_sz4 { struct outer_arr_dyn { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_INNER_MAP); @@ -81,7 +81,7 @@ struct outer_arr_dyn { struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); - __uint(key_size, sizeof(int)); + __type(key, int); /* Here everything works flawlessly due to reuse of struct inner_map * and compiler will complain at the attempt to use non-inner_map * references below. This is great experience. @@ -111,8 +111,8 @@ struct sockarr_sz2 { struct outer_sockarr_sz1 { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); __array(values, struct sockarr_sz1); } outer_sockarr SEC(".maps") = { .values = { (void *)&sockarr_sz1 }, diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 6c5560162746..1884a5bd10f5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -4,8 +4,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 506da7fd2da2..15e0f9945fe4 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c index 9a6b85dd52d2..e2bea4da194b 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -145,7 +145,7 @@ release: return TC_ACT_OK; } -SEC("classifier/ingress") +SEC("tc") int cls_ingress(struct __sk_buff *skb) { struct ipv6hdr *ip6h; diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c index 77e47b9e4446..4faba88e45a5 100644 --- a/tools/testing/selftests/bpf/progs/test_cgroup_link.c +++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c @@ -6,14 +6,14 @@ int calls = 0; int alt_calls = 0; -SEC("cgroup_skb/egress1") +SEC("cgroup_skb/egress") int egress(struct __sk_buff *skb) { __sync_fetch_and_add(&calls, 1); return 1; } -SEC("cgroup_skb/egress2") +SEC("cgroup_skb/egress") int egress_alt(struct __sk_buff *skb) { __sync_fetch_and_add(&alt_calls, 1); diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index 71184af57749..2ec1de11a3ae 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -153,7 +153,7 @@ int xdp_input_len_exceed(struct xdp_md *ctx) return retval; } -SEC("classifier") +SEC("tc") int tc_use_helper(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -172,7 +172,7 @@ out: return retval; } -SEC("classifier") +SEC("tc") int tc_exceed_mtu(struct __sk_buff *ctx) { __u32 ifindex = GLOBAL_USER_IFINDEX; @@ -196,7 +196,7 @@ int tc_exceed_mtu(struct __sk_buff *ctx) return retval; } -SEC("classifier") +SEC("tc") int tc_exceed_mtu_da(struct __sk_buff *ctx) { /* SKB Direct-Access variant */ @@ -223,7 +223,7 @@ int tc_exceed_mtu_da(struct __sk_buff *ctx) return retval; } -SEC("classifier") +SEC("tc") int tc_minus_delta(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -245,7 +245,7 @@ int tc_minus_delta(struct __sk_buff *ctx) return retval; } -SEC("classifier") +SEC("tc") int tc_input_len(struct __sk_buff *ctx) { int retval = BPF_OK; /* Expected retval on successful test */ @@ -265,7 +265,7 @@ int tc_input_len(struct __sk_buff *ctx) return retval; } -SEC("classifier") +SEC("tc") int tc_input_len_exceed(struct __sk_buff *ctx) { int retval = BPF_DROP; /* Fail */ diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index e2a5acc4785c..2833ad722cb7 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -928,7 +928,7 @@ static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) } } -SEC("classifier/cls_redirect") +SEC("tc") int cls_redirect(struct __sk_buff *skb) { metrics_t *metrics = get_global_metrics(); diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index 8b533db4a7a5..b2ded497572a 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -42,7 +42,16 @@ struct core_reloc_mods { core_reloc_mods_substruct_t h; }; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) +#else +#define CORE_READ(dst, src) ({ \ + int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \ + sizeof(*(src)); \ + bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \ + (const char *)(src) + sizeof(*(src)) - __sz); \ +}) +#endif SEC("raw_tracepoint/sys_enter") int test_core_mods(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c index 01a002ade529..1705097d01d7 100644 --- a/tools/testing/selftests/bpf/progs/test_enable_stats.c +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -13,6 +13,6 @@ __u64 count = 0; SEC("raw_tracepoint/sys_enter") int test_enable_stats(void *ctx) { - count += 1; + __sync_fetch_and_add(&count, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 1319be1c54ba..719e314ef3e4 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -68,7 +68,7 @@ static struct foo struct3 = { bpf_map_update_elem(&result_##map, &key, var, 0); \ } while (0) -SEC("classifier/static_data_load") +SEC("tc") int load_static_data(struct __sk_buff *skb) { static const __u64 bar = ~0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index 880260f6d536..7b42dad187b8 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -38,7 +38,7 @@ int f3(int val, struct __sk_buff *skb, int var) return skb->ifindex * val * var; } -SEC("classifier/test") +SEC("tc") int test_cls(struct __sk_buff *skb) { return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c index 86f0ecb304fc..01bf8275dfd6 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func3.c +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -54,7 +54,7 @@ int f8(struct __sk_buff *skb) } #endif -SEC("classifier/test") +SEC("tc") int test_cls(struct __sk_buff *skb) { #ifndef NO_FN8 diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index 260c25b827ef..9248d03e0d06 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb) return skb->ifindex * val; } -SEC("classifier/test") +SEC("tc") int test_cls(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c index 69e19c64e10b..af8c78bdfb25 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func6.c +++ b/tools/testing/selftests/bpf/progs/test_global_func6.c @@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb) return skb->ifindex * val; } -SEC("classifier/test") +SEC("tc") int test_cls(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c index 309b3f6136bd..6cb8e2f5254c 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func7.c +++ b/tools/testing/selftests/bpf/progs/test_global_func7.c @@ -10,7 +10,7 @@ void foo(struct __sk_buff *skb) skb->tc_index = 0; } -SEC("classifier/test") +SEC("tc") int test_cls(struct __sk_buff *skb) { foo(skb); diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c index d6a0b3086b90..0650d918c096 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -2,24 +2,48 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" - #include <bpf/bpf_helpers.h> +#define X_0(x) +#define X_1(x) x X_0(x) +#define X_2(x) x X_1(x) +#define X_3(x) x X_2(x) +#define X_4(x) x X_3(x) +#define X_5(x) x X_4(x) +#define X_6(x) x X_5(x) +#define X_7(x) x X_6(x) +#define X_8(x) x X_7(x) +#define X_9(x) x X_8(x) +#define X_10(x) x X_9(x) +#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y) + extern const int bpf_testmod_ksym_percpu __ksym; +extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak; -int out_mod_ksym_global = 0; -bool triggered = false; +int out_bpf_testmod_ksym = 0; +const volatile int x = 0; -SEC("raw_tp/sys_enter") -int handler(const void *ctx) +SEC("tc") +int load(struct __sk_buff *skb) { - int *val; - __u32 cpu; - - val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); - out_mod_ksym_global = *val; - triggered = true; + /* This will be kept by clang, but removed by verifier. Since it is + * marked as __weak, libbpf and gen_loader don't error out if BTF ID + * is not found for it, instead imm and off is set to 0 for it. + */ + if (x) + bpf_testmod_invalid_mod_kfunc(); + bpf_testmod_test_mod_kfunc(42); + out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + return 0; +} +SEC("tc") +int load_256(struct __sk_buff *skb) +{ + /* this will fail if kfunc doesn't reuse its own btf fd index */ + REPEAT_256(bpf_testmod_test_mod_kfunc(42);); + bpf_testmod_test_mod_kfunc(42); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 5f8379aadb29..8eadbd4caf7a 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -38,7 +38,7 @@ int pass_handler(const void *ctx) /* tests existing symbols. */ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); if (rq) - out__existing_typed = rq->cpu; + out__existing_typed = 0; out__existing_typeless = (__u64)&bpf_prog_active; /* tests non-existent symbols. */ diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 33493911d87a..04fee08863cb 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -21,8 +21,6 @@ #include "test_iptunnel_common.h" #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index 1cfeb940cf9f..f416032ba858 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -9,21 +9,19 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); __uint(map_flags, 0); - __uint(key_size, sizeof(__u32)); - /* must be sizeof(__u32) for map in map */ - __uint(value_size, sizeof(__u32)); + __type(key, __u32); + __type(value, __u32); } mim_array SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 1); __uint(map_flags, 0); - __uint(key_size, sizeof(int)); - /* must be sizeof(__u32) for map in map */ - __uint(value_size, sizeof(__u32)); + __type(key, int); + __type(value, __u32); } mim_hash SEC(".maps"); -SEC("xdp_mimtest") +SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { int value = 123; @@ -49,5 +47,4 @@ int xdp_mimtest0(struct xdp_md *ctx) return XDP_PASS; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c index 703c08e06442..9c7d75cf0bd6 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c @@ -13,7 +13,7 @@ struct inner { struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 0); /* This will make map creation to fail */ - __uint(key_size, sizeof(__u32)); + __type(key, __u32); __array(values, struct inner); } mim SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 6077a025092c..2c121c5d66a7 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -293,7 +293,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) return check_active_hdr_in(skops); } -SEC("sockops/misc_estab") +SEC("sockops") int misc_estab(struct bpf_sock_ops *skops) { int true_val = 1; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index bd37ceec5587..b36857093f71 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -27,6 +27,20 @@ int BPF_PROG(handle_raw_tp_bare, return 0; } +int raw_tp_writable_bare_in_val = 0; +int raw_tp_writable_bare_early_ret = 0; +int raw_tp_writable_bare_out_val = 0; + +SEC("raw_tp.w/bpf_testmod_test_writable_bare") +int BPF_PROG(handle_raw_tp_writable_bare, + struct bpf_testmod_test_writable_ctx *writable) +{ + raw_tp_writable_bare_in_val = writable->val; + writable->early_ret = raw_tp_writable_bare_early_ret; + writable->val = raw_tp_writable_bare_out_val; + return 0; +} + __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c index fb22de7c365d..1249a945699f 100644 --- a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c +++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c @@ -7,15 +7,15 @@ struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 1); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } array_1 SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 1); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); __uint(map_flags, BPF_F_PRESERVE_ELEMS); } array_2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index 8207a2dc2f9d..17d5b67744d5 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -7,19 +7,35 @@ #include <bpf/bpf_tracing.h> struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} my_pid_map SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } perf_buf_map SEC(".maps"); SEC("tp/raw_syscalls/sys_enter") int handle_sys_enter(void *ctx) { + int zero = 0, *my_pid, cur_pid; int cpu = bpf_get_smp_processor_id(); + my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); + if (!my_pid) + return 1; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + if (cur_pid != *my_pid) + return 1; + bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU, &cpu, sizeof(cpu)); - return 0; + return 1; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index 4ef2630292b2..0facea6cbbae 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c index 5412e0c732c7..2a56db1094b8 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 852051064507..0558544e1ff0 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -15,7 +15,6 @@ #include <bpf/bpf_endian.h> #define barrier() __asm__ __volatile__("": : :"memory") -int _version SEC("version") = 1; /* llvm will optimize both subprograms into exactly the same BPF assembly * @@ -97,7 +96,7 @@ int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) return 0; } -SEC("classifier/test_pkt_access") +SEC("tc") int test_pkt_access(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c index 610c74ea9f64..d1839366f3e1 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c @@ -7,8 +7,6 @@ #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define TEST_FIELD(TYPE, FIELD, MASK) \ { \ @@ -27,7 +25,7 @@ int _version SEC("version") = 1; } #endif -SEC("classifier/test_pkt_md_access") +SEC("tc") int test_pkt_md_access(struct __sk_buff *skb) { TEST_FIELD(__u8, len, 0xFF); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 89b3532ccc75..8812a90da4eb 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -8,13 +8,37 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#if defined(__TARGET_ARCH_x86) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__x64_" +#elif defined(__TARGET_ARCH_s390) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__s390x_" +#elif defined(__TARGET_ARCH_arm64) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__arm64_" +#else +#define SYSCALL_WRAPPER 0 +#define SYS_PREFIX "" +#endif + static struct sockaddr_in old; -SEC("kprobe/__sys_connect") +SEC("kprobe/" SYS_PREFIX "sys_connect") int BPF_KPROBE(handle_sys_connect) { - void *ptr = (void *)PT_REGS_PARM2(ctx); +#if SYSCALL_WRAPPER == 1 + struct pt_regs *real_regs; +#endif struct sockaddr_in new; + void *ptr; + +#if SYSCALL_WRAPPER == 0 + ptr = (void *)PT_REGS_PARM2(ctx); +#else + real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); + bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); +#endif bpf_probe_read_user(&old, sizeof(old), ptr); __builtin_memset(&new, 0xab, sizeof(new)); diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 4dd9806ad73b..0fcd3ff0e38a 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -8,8 +8,6 @@ #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, MAP_TYPE); __uint(max_entries, 32); diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 26e77dcc7e91..7d56ed47cd4d 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -15,8 +15,6 @@ #include <bpf/bpf_helpers.h> #include "test_select_reuseport_common.h" -int _version SEC("version") = 1; - #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif @@ -24,8 +22,8 @@ int _version SEC("version") = 1; struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 1); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __type(key, __u32); + __type(value, __u32); } outer_map SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 1ecd987005d2..02f79356d5eb 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -36,7 +36,6 @@ struct { .pinning = PIN_GLOBAL_NS, }; -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ @@ -159,7 +158,7 @@ assign: return ret; } -SEC("classifier/sk_assign_test") +SEC("tc") int bpf_sk_assign_test(struct __sk_buff *skb) { struct bpf_sock_tuple *tuple, ln = {0}; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index ac6f7f205e25..19d2465d9442 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -72,32 +72,32 @@ static const __u16 DST_PORT = 7007; /* Host byte order */ static const __u32 DST_IP4 = IP4(127, 0, 0, 1); static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); -SEC("sk_lookup/lookup_pass") +SEC("sk_lookup") int lookup_pass(struct bpf_sk_lookup *ctx) { return SK_PASS; } -SEC("sk_lookup/lookup_drop") +SEC("sk_lookup") int lookup_drop(struct bpf_sk_lookup *ctx) { return SK_DROP; } -SEC("sk_reuseport/reuse_pass") +SEC("sk_reuseport") int reuseport_pass(struct sk_reuseport_md *ctx) { return SK_PASS; } -SEC("sk_reuseport/reuse_drop") +SEC("sk_reuseport") int reuseport_drop(struct sk_reuseport_md *ctx) { return SK_DROP; } /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ -SEC("sk_lookup/redir_port") +SEC("sk_lookup") int redir_port(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -116,7 +116,7 @@ int redir_port(struct bpf_sk_lookup *ctx) } /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ -SEC("sk_lookup/redir_ip4") +SEC("sk_lookup") int redir_ip4(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -139,7 +139,7 @@ int redir_ip4(struct bpf_sk_lookup *ctx) } /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ -SEC("sk_lookup/redir_ip6") +SEC("sk_lookup") int redir_ip6(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -164,7 +164,7 @@ int redir_ip6(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_lookup/select_sock_a") +SEC("sk_lookup") int select_sock_a(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -179,7 +179,7 @@ int select_sock_a(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_lookup/select_sock_a_no_reuseport") +SEC("sk_lookup") int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -194,7 +194,7 @@ int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) return err ? SK_DROP : SK_PASS; } -SEC("sk_reuseport/select_sock_b") +SEC("sk_reuseport") int select_sock_b(struct sk_reuseport_md *ctx) { __u32 key = KEY_SERVER_B; @@ -205,7 +205,7 @@ int select_sock_b(struct sk_reuseport_md *ctx) } /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ -SEC("sk_lookup/sk_assign_eexist") +SEC("sk_lookup") int sk_assign_eexist(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -238,7 +238,7 @@ out: } /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ -SEC("sk_lookup/sk_assign_replace_flag") +SEC("sk_lookup") int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -270,7 +270,7 @@ out: } /* Check that bpf_sk_assign(sk=NULL) is accepted. */ -SEC("sk_lookup/sk_assign_null") +SEC("sk_lookup") int sk_assign_null(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk = NULL; @@ -313,7 +313,7 @@ out: } /* Check that selected sk is accessible through context. */ -SEC("sk_lookup/access_ctx_sk") +SEC("sk_lookup") int access_ctx_sk(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk1 = NULL, *sk2 = NULL; @@ -379,7 +379,7 @@ out: * are not covered because they give bogus results, that is the * verifier ignores the offset. */ -SEC("sk_lookup/ctx_narrow_access") +SEC("sk_lookup") int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -553,7 +553,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) } /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ -SEC("sk_lookup/sk_assign_esocknosupport") +SEC("sk_lookup") int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; @@ -578,28 +578,28 @@ out: return ret; } -SEC("sk_lookup/multi_prog_pass1") +SEC("sk_lookup") int multi_prog_pass1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_PASS; } -SEC("sk_lookup/multi_prog_pass2") +SEC("sk_lookup") int multi_prog_pass2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); return SK_PASS; } -SEC("sk_lookup/multi_prog_drop1") +SEC("sk_lookup") int multi_prog_drop1(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); return SK_DROP; } -SEC("sk_lookup/multi_prog_drop2") +SEC("sk_lookup") int multi_prog_drop2(struct bpf_sk_lookup *ctx) { bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); @@ -623,7 +623,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) return SK_PASS; } -SEC("sk_lookup/multi_prog_redir1") +SEC("sk_lookup") int multi_prog_redir1(struct bpf_sk_lookup *ctx) { int ret; @@ -633,7 +633,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx) return SK_PASS; } -SEC("sk_lookup/multi_prog_redir2") +SEC("sk_lookup") int multi_prog_redir2(struct bpf_sk_lookup *ctx) { int ret; @@ -644,4 +644,3 @@ int multi_prog_redir2(struct bpf_sk_lookup *ctx) } char _license[] SEC("license") = "Dual BSD/GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index 8249075f088f..40f161480a2f 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -15,7 +15,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ @@ -53,8 +52,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("classifier/sk_lookup_success") -int bpf_sk_lookup_test0(struct __sk_buff *skb) +SEC("tc") +int sk_lookup_success(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -79,8 +78,8 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("classifier/sk_lookup_success_simple") -int bpf_sk_lookup_test1(struct __sk_buff *skb) +SEC("tc") +int sk_lookup_success_simple(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -91,8 +90,8 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("classifier/err_use_after_free") -int bpf_sk_lookup_uaf(struct __sk_buff *skb) +SEC("tc") +int err_use_after_free(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -106,8 +105,8 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("classifier/err_modify_sk_pointer") -int bpf_sk_lookup_modptr(struct __sk_buff *skb) +SEC("tc") +int err_modify_sk_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -121,8 +120,8 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("classifier/err_modify_sk_or_null_pointer") -int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) +SEC("tc") +int err_modify_sk_or_null_pointer(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -135,8 +134,8 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("classifier/err_no_release") -int bpf_sk_lookup_test2(struct __sk_buff *skb) +SEC("tc") +int err_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -144,8 +143,8 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("classifier/err_release_twice") -int bpf_sk_lookup_test3(struct __sk_buff *skb) +SEC("tc") +int err_release_twice(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -156,8 +155,8 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("classifier/err_release_unchecked") -int bpf_sk_lookup_test4(struct __sk_buff *skb) +SEC("tc") +int err_release_unchecked(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; @@ -173,8 +172,8 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("classifier/err_no_release_subcall") -int bpf_sk_lookup_test5(struct __sk_buff *skb) +SEC("tc") +int err_no_release_subcall(struct __sk_buff *skb) { lookup_no_release(skb); return 0; diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 552f2090665c..c304cd5b8cad 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -42,6 +42,4 @@ int log_cgroup_id(struct __sk_buff *skb) return TC_ACT_OK; } -int _version SEC("version") = 1; - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index b02ea589ce7e..1d61b36e6067 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,7 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; SEC("skb_ctx") @@ -25,6 +24,12 @@ int process(struct __sk_buff *skb) return 1; if (skb->gso_size != 10) return 1; + if (skb->ingress_ifindex != 11) + return 1; + if (skb->ifindex != 1) + return 1; + if (skb->hwtstamp != 11) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c index bb3fbf1a29e3..507215791c5b 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c @@ -14,7 +14,7 @@ struct { char _license[] SEC("license") = "GPL"; -SEC("classifier/test_skb_helpers") +SEC("tc") int test_skb_helpers(struct __sk_buff *skb) { struct task_struct *task; diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c new file mode 100644 index 000000000000..a408ec95cba4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +const volatile pid_t my_pid = 0; +char path[256] = {}; + +SEC("fentry/unix_listen") +int BPF_PROG(unix_listen, struct socket *sock, int backlog) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + struct unix_sock *unix_sk; + int i, len; + + if (pid != my_pid) + return 0; + + unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk); + if (!unix_sk) + return 0; + + if (!UNIX_ABSTRACT(unix_sk)) + return 0; + + len = unix_sk->addr->len - sizeof(short); + path[0] = '@'; + for (i = 1; i < len; i++) { + if (i >= sizeof(struct sockaddr_un)) + break; + + path[i] = unix_sk->addr->name->sun_path[i]; + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 441fa1c552c8..1b1187d2967b 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -5,6 +5,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#define __read_mostly SEC(".data.read_mostly") + struct s { int a; long long b; @@ -40,9 +42,20 @@ int kern_ver = 0; struct s out5 = {}; + +const volatile int in_dynarr_sz SEC(".rodata.dyn"); +const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 }; + +int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; + +int read_mostly_var __read_mostly; +int out_mostly_var; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { + int i; + out1 = in1; out2 = in2; out3 = in3; @@ -53,6 +66,11 @@ int handler(const void *ctx) bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; + for (i = 0; i < in_dynarr_sz; i++) + out_dynarr[i] = in_dynarr[i]; + + out_mostly_var = read_mostly_var; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 1858435de7aa..2966564b8497 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -361,5 +361,4 @@ int bpf_prog10(struct sk_msg_md *msg) return SK_DROP; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index a1cc58b10c7c..325c9f193432 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -56,7 +56,7 @@ int prog_stream_verdict(struct __sk_buff *skb) return verdict; } -SEC("sk_skb/skb_verdict") +SEC("sk_skb") int prog_skb_verdict(struct __sk_buff *skb) { unsigned int *count; @@ -116,5 +116,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse) return verdict; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c index 2d31f66e4f23..3c69aa971738 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c @@ -9,7 +9,7 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); -SEC("sk_skb/skb_verdict") +SEC("sk_skb") int prog_skb_verdict(struct __sk_buff *skb) { return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c index 9d0c9f28cab2..6d64ea536e3d 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_update.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c @@ -24,7 +24,7 @@ struct { __type(value, __u64); } dst_sock_hash SEC(".maps"); -SEC("classifier/copy_sock_map") +SEC("tc") int copy_sock_map(void *ctx) { struct bpf_sock *sk; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 0cf0134631b4..36a707e7c7a7 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -28,8 +28,8 @@ struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 128); __uint(map_flags, BPF_F_STACK_BUILD_ID); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(stack_trace_t)); + __type(key, __u32); + __type(value, stack_trace_t); } stackmap SEC(".maps"); struct { @@ -73,4 +73,3 @@ int oncpu(struct random_urandom_args *args) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 00ed48672620..a8233e7f173b 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -27,8 +27,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 16384); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(stack_trace_t)); + __type(key, __u32); + __type(value, stack_trace_t); } stackmap SEC(".maps"); struct { diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c index 18a3a7ed924a..d28ca8d1f3d0 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -5,7 +5,7 @@ /* Dummy prog to test TC-BPF API */ -SEC("classifier") +SEC("tc") int cls(struct __sk_buff *skb) { return 0; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 0c93d326a663..3e32ea375ab4 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -70,7 +70,7 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -SEC("classifier/chk_egress") +SEC("tc") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); @@ -83,7 +83,7 @@ int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("classifier/dst_ingress") +SEC("tc") int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; @@ -108,7 +108,7 @@ int tc_dst(struct __sk_buff *skb) return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("classifier/src_ingress") +SEC("tc") int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index f7ab69cf018e..ec4cce19362d 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,7 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("classifier/chk_egress") +SEC("tc") int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); @@ -143,13 +143,13 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("classifier/dst_ingress") +SEC("tc") int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("classifier/src_ingress") +SEC("tc") int tc_src(struct __sk_buff *skb) { return tc_redir(skb); diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index fe818cd5f010..365eacb5dc34 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -16,31 +16,31 @@ volatile const __u32 IFINDEX_DST; static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; -SEC("classifier/chk_egress") +SEC("tc") int tc_chk(struct __sk_buff *skb) { return TC_ACT_SHOT; } -SEC("classifier/dst_ingress") +SEC("tc") int tc_dst(struct __sk_buff *skb) { return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("classifier/src_ingress") +SEC("tc") int tc_src(struct __sk_buff *skb) { return bpf_redirect_peer(IFINDEX_DST, 0); } -SEC("classifier/dst_ingress_l3") +SEC("tc") int tc_dst_l3(struct __sk_buff *skb) { return bpf_redirect(IFINDEX_SRC, 0); } -SEC("classifier/src_ingress_l3") +SEC("tc") int tc_src_l3(struct __sk_buff *skb) { __u16 proto = skb->protocol; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index 47cbe2eeae43..cd747cd93dbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -148,7 +148,7 @@ release: bpf_sk_release(sk); } -SEC("clsact/check_syncookie") +SEC("tc") int check_syncookie_clsact(struct __sk_buff *skb) { check_syncookie(skb, (void *)(long)skb->data, @@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("xdp/check_syncookie") +SEC("xdp") int check_syncookie_xdp(struct xdp_md *ctx) { check_syncookie(ctx, (void *)(long)ctx->data, diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index adc83a54c352..2c5c602c6011 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 678bd0fad29e..5f4e87ee949a 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -594,7 +594,7 @@ static int handle_parse_hdr(struct bpf_sock_ops *skops) return CG_OK; } -SEC("sockops/estab") +SEC("sockops") int estab(struct bpf_sock_ops *skops) { int true_val = 1; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 94f50f7e94d6..3ded05280757 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -16,7 +16,6 @@ #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; -int _version SEC("version") = 1; /** * SOL_TCP is defined in <netinet/tcp.h> while diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index ac63410bb541..540181c115a8 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -24,12 +24,10 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 2); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(__u32)); + __type(key, int); + __type(value, __u32); } perf_event_map SEC(".maps"); -int _version SEC("version") = 1; - SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 4b825ee122cf..ce6974016f53 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index e7b673117436..ef0dde83b85a 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -26,8 +26,6 @@ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ } while (0) -int _version SEC("version") = 1; - struct geneve_opt { __be16 opt_class; __u8 type; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index 31f9bce37491..d7a9a74b7245 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -20,8 +20,6 @@ #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); @@ -210,7 +208,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return XDP_TX; } -SEC("xdp_tx_iptunnel") +SEC("xdp") int _xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 3d66599eee2e..199c61b7d062 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -2,7 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -SEC("xdp_adjust_tail_grow") +SEC("xdp") int _xdp_adjust_tail_grow(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c index 22065a9cfb25..b7448253d135 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c @@ -9,9 +9,7 @@ #include <linux/if_ether.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - -SEC("xdp_adjust_tail_shrink") +SEC("xdp") int _xdp_adjust_tail_shrink(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index a038e827f850..58cf4345f5cc 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -36,8 +36,8 @@ struct meta { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __type(key, int); + __type(value, int); } perf_buf_map SEC(".maps"); __u64 test_result_fentry = 0; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c index b360ba2bd441..807bf895f42c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -SEC("xdp_dm_log") +SEC("xdp") int xdpdm_devlog(struct xdp_md *ctx) { char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c index eb93ea95d1d8..ee7d6ac0f615 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_link.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -5,7 +5,7 @@ char LICENSE[] SEC("license") = "GPL"; -SEC("xdp/handler") +SEC("xdp") int xdp_handler(struct xdp_md *xdp) { return 0; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index fcabcda30ba3..c98fb44156f0 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -16,8 +16,6 @@ #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); @@ -206,7 +204,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp) return XDP_TX; } -SEC("xdp_tx_iptunnel") +SEC("xdp") int _xdp_tx_iptunnel(struct xdp_md *xdp) { void *data_end = (void *)(long)xdp->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 3a67921f62b5..596c4e71bf3a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -797,7 +797,7 @@ out: return XDP_DROP; } -SEC("xdp-test-v4") +SEC("xdp") int balancer_ingress_v4(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; @@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx) return XDP_DROP; } -SEC("xdp-test-v6") +SEC("xdp") int balancer_ingress_v6(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index a5337cd9400b..b778cad45485 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -12,8 +12,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - SEC("redirect_to_111") int xdp_redirect_to_111(struct xdp_md *xdp) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 59ee4f182ff8..532025057711 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -12,13 +12,13 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); -SEC("xdp_redir") +SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { return bpf_redirect_map(&cpu_map, 1, 0); } -SEC("xdp_dummy") +SEC("xdp") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 0ac086497722..1e6b9c38ea6d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -9,7 +9,7 @@ struct { __uint(max_entries, 4); } dm_ports SEC(".maps"); -SEC("xdp_redir") +SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { return bpf_redirect_map(&dm_ports, 1, 0); @@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx) /* invalid program on DEVMAP entry; * SEC name means expected attach type not set */ -SEC("xdp_dummy") +SEC("xdp") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c new file mode 100644 index 000000000000..d327241ba047 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int null_data_vprintk_ret = 0; +int trace_vprintk_ret = 0; +int trace_vprintk_ran = 0; + +SEC("fentry/__x64_sys_nanosleep") +int sys_enter(void *ctx) +{ + static const char one[] = "1"; + static const char three[] = "3"; + static const char five[] = "5"; + static const char seven[] = "7"; + static const char nine[] = "9"; + static const char f[] = "%pS\n"; + + /* runner doesn't search for \t, just ensure it compiles */ + bpf_printk("\t"); + + trace_vprintk_ret = __bpf_vprintk("%s,%d,%s,%d,%s,%d,%s,%d,%s,%d %d\n", + one, 2, three, 4, five, 6, seven, 8, nine, 10, ++trace_vprintk_ran); + + /* non-NULL fmt w/ NULL data should result in error */ + null_data_vprintk_ret = bpf_trace_vprintk(f, sizeof(f), NULL, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c new file mode 100644 index 000000000000..de1b18a62b46 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/twfw.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <linux/bpf.h> +#include <stdint.h> + +#define TWFW_MAX_TIERS (64) +/* + * load is successful + * #define TWFW_MAX_TIERS (64u)$ + */ + +struct twfw_tier_value { + unsigned long mask[1]; +}; + +struct rule { + uint8_t seqnum; +}; + +struct rules_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct rule); + __uint(max_entries, 1); +}; + +struct tiers_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct twfw_tier_value); + __uint(max_entries, 1); +}; + +struct rules_map rules SEC(".maps"); +struct tiers_map tiers SEC(".maps"); + +SEC("cgroup_skb/ingress") +int twfw_verifier(struct __sk_buff* skb) +{ + const uint32_t key = 0; + const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key); + if (!tier) + return 1; + + struct rule* rule = bpf_map_lookup_elem(&rules, &key); + if (!rule) + return 1; + + if (rule && rule->seqnum < TWFW_MAX_TIERS) { + /* rule->seqnum / 64 should always be 0 */ + unsigned long mask = tier->mask[rule->seqnum / 64]; + if (mask) + return 0; + } + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c index ea25e8881992..d988b2e0cee8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_dummy.c +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c @@ -4,7 +4,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -SEC("xdp_dummy") +SEC("xdp") int xdp_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c index 880debcbcd65..8395782b6e0a 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -34,7 +34,7 @@ struct { __uint(max_entries, 128); } mac_map SEC(".maps"); -SEC("xdp_redirect_map_multi") +SEC("xdp") int xdp_redirect_map_multi_prog(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; @@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx) } /* The following 2 progs are for 2nd devmap prog testing */ -SEC("xdp_redirect_map_ingress") +SEC("xdp") int xdp_redirect_map_all_prog(struct xdp_md *ctx) { return bpf_redirect_map(&map_egress, 0, diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 6b9ca40bd1f4..4ad73847b8a5 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type) return XDP_TX; } -SEC("xdpclient") +SEC("xdp") int xdping_client(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; @@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx) return XDP_TX; } -SEC("xdpserver") +SEC("xdp") int xdping_server(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c new file mode 100644 index 000000000000..7a891a0c3a39 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdpwall.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <stdbool.h> +#include <stdint.h> +#include <linux/stddef.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +enum pkt_parse_err { + NO_ERR, + BAD_IP6_HDR, + BAD_IP4GUE_HDR, + BAD_IP6GUE_HDR, +}; + +enum pkt_flag { + TUNNEL = 0x1, + TCP_SYN = 0x2, + QUIC_INITIAL_FLAG = 0x4, + TCP_ACK = 0x8, + TCP_RST = 0x10 +}; + +struct v4_lpm_key { + __u32 prefixlen; + __u32 src; +}; + +struct v4_lpm_val { + struct v4_lpm_key key; + __u8 val; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16); + __type(key, struct in6_addr); + __type(value, bool); +} v6_addr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16); + __type(key, __u32); + __type(value, bool); +} v4_addr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 16); + __uint(key_size, sizeof(struct v4_lpm_key)); + __uint(value_size, sizeof(struct v4_lpm_val)); + __uint(map_flags, BPF_F_NO_PREALLOC); +} v4_lpm_val_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 16); + __type(key, int); + __type(value, __u8); +} tcp_port_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 16); + __type(key, int); + __type(value, __u16); +} udp_port_map SEC(".maps"); + +enum ip_type { V4 = 1, V6 = 2 }; + +struct fw_match_info { + __u8 v4_src_ip_match; + __u8 v6_src_ip_match; + __u8 v4_src_prefix_match; + __u8 v4_dst_prefix_match; + __u8 tcp_dp_match; + __u16 udp_sp_match; + __u16 udp_dp_match; + bool is_tcp; + bool is_tcp_syn; +}; + +struct pkt_info { + enum ip_type type; + union { + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } ip; + int sport; + int dport; + __u16 trans_hdr_offset; + __u8 proto; + __u8 flags; +}; + +static __always_inline struct ethhdr *parse_ethhdr(void *data, void *data_end) +{ + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return NULL; + + return eth; +} + +static __always_inline __u8 filter_ipv6_addr(const struct in6_addr *ipv6addr) +{ + __u8 *leaf; + + leaf = bpf_map_lookup_elem(&v6_addr_map, ipv6addr); + + return leaf ? *leaf : 0; +} + +static __always_inline __u8 filter_ipv4_addr(const __u32 ipaddr) +{ + __u8 *leaf; + + leaf = bpf_map_lookup_elem(&v4_addr_map, &ipaddr); + + return leaf ? *leaf : 0; +} + +static __always_inline __u8 filter_ipv4_lpm(const __u32 ipaddr) +{ + struct v4_lpm_key v4_key = {}; + struct v4_lpm_val *lpm_val; + + v4_key.src = ipaddr; + v4_key.prefixlen = 32; + + lpm_val = bpf_map_lookup_elem(&v4_lpm_val_map, &v4_key); + + return lpm_val ? lpm_val->val : 0; +} + + +static __always_inline void +filter_src_dst_ip(struct pkt_info* info, struct fw_match_info* match_info) +{ + if (info->type == V6) { + match_info->v6_src_ip_match = + filter_ipv6_addr(&info->ip.ipv6->saddr); + } else if (info->type == V4) { + match_info->v4_src_ip_match = + filter_ipv4_addr(info->ip.ipv4->saddr); + match_info->v4_src_prefix_match = + filter_ipv4_lpm(info->ip.ipv4->saddr); + match_info->v4_dst_prefix_match = + filter_ipv4_lpm(info->ip.ipv4->daddr); + } +} + +static __always_inline void * +get_transport_hdr(__u16 offset, void *data, void *data_end) +{ + if (offset > 255 || data + offset > data_end) + return NULL; + + return data + offset; +} + +static __always_inline bool tcphdr_only_contains_flag(struct tcphdr *tcp, + __u32 FLAG) +{ + return (tcp_flag_word(tcp) & + (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN)) == FLAG; +} + +static __always_inline void set_tcp_flags(struct pkt_info *info, + struct tcphdr *tcp) { + if (tcphdr_only_contains_flag(tcp, TCP_FLAG_SYN)) + info->flags |= TCP_SYN; + else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_ACK)) + info->flags |= TCP_ACK; + else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_RST)) + info->flags |= TCP_RST; +} + +static __always_inline bool +parse_tcp(struct pkt_info *info, void *transport_hdr, void *data_end) +{ + struct tcphdr *tcp = transport_hdr; + + if (tcp + 1 > data_end) + return false; + + info->sport = bpf_ntohs(tcp->source); + info->dport = bpf_ntohs(tcp->dest); + set_tcp_flags(info, tcp); + + return true; +} + +static __always_inline bool +parse_udp(struct pkt_info *info, void *transport_hdr, void *data_end) +{ + struct udphdr *udp = transport_hdr; + + if (udp + 1 > data_end) + return false; + + info->sport = bpf_ntohs(udp->source); + info->dport = bpf_ntohs(udp->dest); + + return true; +} + +static __always_inline __u8 filter_tcp_port(int port) +{ + __u8 *leaf = bpf_map_lookup_elem(&tcp_port_map, &port); + + return leaf ? *leaf : 0; +} + +static __always_inline __u16 filter_udp_port(int port) +{ + __u16 *leaf = bpf_map_lookup_elem(&udp_port_map, &port); + + return leaf ? *leaf : 0; +} + +static __always_inline bool +filter_transport_hdr(void *transport_hdr, void *data_end, + struct pkt_info *info, struct fw_match_info *match_info) +{ + if (info->proto == IPPROTO_TCP) { + if (!parse_tcp(info, transport_hdr, data_end)) + return false; + + match_info->is_tcp = true; + match_info->is_tcp_syn = (info->flags & TCP_SYN) > 0; + + match_info->tcp_dp_match = filter_tcp_port(info->dport); + } else if (info->proto == IPPROTO_UDP) { + if (!parse_udp(info, transport_hdr, data_end)) + return false; + + match_info->udp_dp_match = filter_udp_port(info->dport); + match_info->udp_sp_match = filter_udp_port(info->sport); + } + + return true; +} + +static __always_inline __u8 +parse_gue_v6(struct pkt_info *info, struct ipv6hdr *ip6h, void *data_end) +{ + struct udphdr *udp = (struct udphdr *)(ip6h + 1); + void *encap_data = udp + 1; + + if (udp + 1 > data_end) + return BAD_IP6_HDR; + + if (udp->dest != bpf_htons(6666)) + return NO_ERR; + + info->flags |= TUNNEL; + + if (encap_data + 1 > data_end) + return BAD_IP6GUE_HDR; + + if (*(__u8 *)encap_data & 0x30) { + struct ipv6hdr *inner_ip6h = encap_data; + + if (inner_ip6h + 1 > data_end) + return BAD_IP6GUE_HDR; + + info->type = V6; + info->proto = inner_ip6h->nexthdr; + info->ip.ipv6 = inner_ip6h; + info->trans_hdr_offset += sizeof(struct ipv6hdr) + sizeof(struct udphdr); + } else { + struct iphdr *inner_ip4h = encap_data; + + if (inner_ip4h + 1 > data_end) + return BAD_IP6GUE_HDR; + + info->type = V4; + info->proto = inner_ip4h->protocol; + info->ip.ipv4 = inner_ip4h; + info->trans_hdr_offset += sizeof(struct iphdr) + sizeof(struct udphdr); + } + + return NO_ERR; +} + +static __always_inline __u8 parse_ipv6_gue(struct pkt_info *info, + void *data, void *data_end) +{ + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + + if (ip6h + 1 > data_end) + return BAD_IP6_HDR; + + info->proto = ip6h->nexthdr; + info->ip.ipv6 = ip6h; + info->type = V6; + info->trans_hdr_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + + if (info->proto == IPPROTO_UDP) + return parse_gue_v6(info, ip6h, data_end); + + return NO_ERR; +} + +SEC("xdp") +int edgewall(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)(ctx->data_end); + void *data = (void *)(long)(ctx->data); + struct fw_match_info match_info = {}; + struct pkt_info info = {}; + __u8 parse_err = NO_ERR; + void *transport_hdr; + struct ethhdr *eth; + bool filter_res; + __u32 proto; + + eth = parse_ethhdr(data, data_end); + if (!eth) + return XDP_DROP; + + proto = eth->h_proto; + if (proto != bpf_htons(ETH_P_IPV6)) + return XDP_DROP; + + if (parse_ipv6_gue(&info, data, data_end)) + return XDP_DROP; + + if (info.proto == IPPROTO_ICMPV6) + return XDP_PASS; + + if (info.proto != IPPROTO_TCP && info.proto != IPPROTO_UDP) + return XDP_DROP; + + filter_src_dst_ip(&info, &match_info); + + transport_hdr = get_transport_hdr(info.trans_hdr_offset, data, + data_end); + if (!transport_hdr) + return XDP_DROP; + + filter_res = filter_transport_hdr(transport_hdr, data_end, + &info, &match_info); + if (!filter_res) + return XDP_DROP; + + if (match_info.is_tcp && !match_info.is_tcp_syn) + return XDP_PASS; + + return XDP_DROP; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py index 4fed2dc25c0a..1c2408ee1f5d 100644 --- a/tools/testing/selftests/bpf/test_bpftool.py +++ b/tools/testing/selftests/bpf/test_bpftool.py @@ -57,6 +57,11 @@ def default_iface(f): return f(*args, iface, **kwargs) return wrapper +DMESG_EMITTING_HELPERS = [ + "bpf_probe_write_user", + "bpf_trace_printk", + "bpf_trace_vprintk", + ] class TestBpftool(unittest.TestCase): @classmethod @@ -67,10 +72,7 @@ class TestBpftool(unittest.TestCase): @default_iface def test_feature_dev_json(self, iface): - unexpected_helpers = [ - "bpf_probe_write_user", - "bpf_trace_printk", - ] + unexpected_helpers = DMESG_EMITTING_HELPERS expected_keys = [ "syscall_config", "program_types", @@ -94,10 +96,7 @@ class TestBpftool(unittest.TestCase): bpftool_json(["feature", "probe"]), bpftool_json(["feature"]), ] - unexpected_helpers = [ - "bpf_probe_write_user", - "bpf_trace_printk", - ] + unexpected_helpers = DMESG_EMITTING_HELPERS expected_keys = [ "syscall_config", "system_config", @@ -121,10 +120,7 @@ class TestBpftool(unittest.TestCase): bpftool_json(["feature", "probe", "kernel", "full"]), bpftool_json(["feature", "probe", "full"]), ] - expected_helpers = [ - "bpf_probe_write_user", - "bpf_trace_printk", - ] + expected_helpers = DMESG_EMITTING_HELPERS for tc in test_cases: # Check if expected helpers are included at least once in any @@ -157,7 +153,7 @@ class TestBpftool(unittest.TestCase): not_full_set.add(helper) self.assertCountEqual(full_set - not_full_set, - {"bpf_probe_write_user", "bpf_trace_printk"}) + set(DMESG_EMITTING_HELPERS)) self.assertCountEqual(not_full_set - full_set, set()) def test_feature_macros(self): diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index b03a87571592..1453a53ed547 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -90,6 +90,10 @@ echo -e "... through kbuild\n" if [ -f ".config" ] ; then make_and_clean tools/bpf + ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for + ## runqslower, but the default (used for the "clean" target) is .output. + ## Let's make sure we clean runqslower's directory properly. + make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed ## down from toplevel Makefile to bpftool's Makefile. diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index e2394eea4b7f..32c7a57867da 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -69,4 +69,7 @@ #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) +#define BTF_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) + #endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 174b72a64a4c..dbd91221727d 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -26,22 +26,22 @@ if [[ -z $(ip netns identify $$) ]]; then type flow_dissector if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected unsuccessful attach in namespace" >&2 err=1 fi - $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ flow_dissector if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected successful attach in namespace" >&2 err=1 fi if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Failed to detach flow dissector" >&2 err=1 fi @@ -95,7 +95,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s flow_dissector +./flow_dissector_load -p bpf_flow.o -s _dissect # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index cc1cd240445d..c65986bd9d07 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -12,6 +12,11 @@ #include <string.h> #include <execinfo.h> /* backtrace */ #include <linux/membarrier.h> +#include <sys/sysinfo.h> /* get_nprocs */ +#include <netinet/in.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/un.h> /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) @@ -45,9 +50,12 @@ struct prog_test_def { const char *test_name; int test_num; void (*run_test)(void); + void (*run_serial_test)(void); bool force_log; int error_cnt; int skip_cnt; + int sub_succ_cnt; + bool should_run; bool tested; bool need_cgroup_cleanup; @@ -97,6 +105,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) if (stdout == env.stdout) return; + /* worker always holds log */ + if (env.worker_id != -1) + return; + fflush(stdout); /* exports env.log_buf & env.log_cnt */ if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { @@ -107,8 +119,6 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) fprintf(env.stdout, "\n"); } } - - fseeko(stdout, 0, SEEK_SET); /* rewind */ } static void skip_account(void) @@ -124,7 +134,8 @@ static void stdio_restore(void); /* A bunch of tests set custom affinity per-thread and/or per-process. Reset * it after each test/sub-test. */ -static void reset_affinity() { +static void reset_affinity(void) +{ cpu_set_t cpuset; int i, err; @@ -165,21 +176,21 @@ static void restore_netns(void) } } -void test__end_subtest() +void test__end_subtest(void) { struct prog_test_def *test = env.test; int sub_error_cnt = test->error_cnt - test->old_error_cnt; dump_test_log(test, sub_error_cnt); - fprintf(env.stdout, "#%d/%d %s/%s:%s\n", + fprintf(stdout, "#%d/%d %s/%s:%s\n", test->test_num, test->subtest_num, test->test_name, test->subtest_name, sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); if (sub_error_cnt) - env.fail_cnt++; + test->error_cnt++; else if (test->skip_cnt == 0) - env.sub_succ_cnt++; + test->sub_succ_cnt++; skip_account(); free(test->subtest_name); @@ -217,7 +228,8 @@ bool test__start_subtest(const char *name) return true; } -void test__force_log() { +void test__force_log(void) +{ env.test->force_log = true; } @@ -370,7 +382,7 @@ int extract_build_id(char *build_id, size_t size) if (getline(&line, &len, fp) == -1) goto err; - fclose(fp); + pclose(fp); if (len > size) len = size; @@ -379,7 +391,7 @@ int extract_build_id(char *build_id, size_t size) free(line); return 0; err: - fclose(fp); + pclose(fp); return -1; } @@ -446,14 +458,17 @@ static int load_bpf_testmod(void) } /* extern declarations for test funcs */ -#define DEFINE_TEST(name) extern void test_##name(void); +#define DEFINE_TEST(name) \ + extern void test_##name(void) __weak; \ + extern void serial_test_##name(void) __weak; #include <prog_tests/tests.h> #undef DEFINE_TEST static struct prog_test_def prog_test_defs[] = { -#define DEFINE_TEST(name) { \ - .test_name = #name, \ - .run_test = &test_##name, \ +#define DEFINE_TEST(name) { \ + .test_name = #name, \ + .run_test = &test_##name, \ + .run_serial_test = &serial_test_##name, \ }, #include <prog_tests/tests.h> #undef DEFINE_TEST @@ -474,6 +489,8 @@ enum ARG_KEYS { ARG_LIST_TEST_NAMES = 'l', ARG_TEST_NAME_GLOB_ALLOWLIST = 'a', ARG_TEST_NAME_GLOB_DENYLIST = 'd', + ARG_NUM_WORKERS = 'j', + ARG_DEBUG = -1, }; static const struct argp_option opts[] = { @@ -495,6 +512,10 @@ static const struct argp_option opts[] = { "Run tests with name matching the pattern (supports '*' wildcard)." }, { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0, "Don't run tests with name matching the pattern (supports '*' wildcard)." }, + { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL, + "Number of workers to run in parallel, default to number of cpus." }, + { "debug", ARG_DEBUG, NULL, 0, + "print extra debug information for test_progs." }, {}, }; @@ -650,7 +671,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", errno); - return -1; + return -EINVAL; } } @@ -661,6 +682,20 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_LIST_TEST_NAMES: env->list_test_names = true; break; + case ARG_NUM_WORKERS: + if (arg) { + env->workers = atoi(arg); + if (!env->workers) { + fprintf(stderr, "Invalid number of worker: %s.", arg); + return -EINVAL; + } + } else { + env->workers = get_nprocs(); + } + break; + case ARG_DEBUG: + env->debug = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -678,7 +713,7 @@ static void stdio_hijack(void) env.stdout = stdout; env.stderr = stderr; - if (env.verbosity > VERBOSE_NONE) { + if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { /* nothing to do, output to stdout by default */ return; } @@ -704,10 +739,6 @@ static void stdio_restore(void) return; fclose(stdout); - free(env.log_buf); - - env.log_buf = NULL; - env.log_cnt = 0; stdout = env.stdout; stderr = env.stderr; @@ -743,6 +774,45 @@ int cd_flavor_subdir(const char *exec_name) return chdir(flavor); } +int trigger_module_test_read(int read_sz) +{ + int fd, err; + + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); + err = -errno; + if (!ASSERT_GE(fd, 0, "testmod_file_open")) + return err; + + read(fd, NULL, read_sz); + close(fd); + + return 0; +} + +int trigger_module_test_write(int write_sz) +{ + int fd, err; + char *buf = malloc(write_sz); + + if (!buf) + return -ENOMEM; + + memset(buf, 'a', write_sz); + buf[write_sz-1] = '\0'; + + fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY); + err = -errno; + if (!ASSERT_GE(fd, 0, "testmod_file_open")) { + free(buf); + return err; + } + + write(fd, buf, write_sz); + close(fd); + free(buf); + return 0; +} + #define MAX_BACKTRACE_SZ 128 void crash_handler(int signum) { @@ -755,11 +825,498 @@ void crash_handler(int signum) dump_test_log(env.test, true); if (env.stdout) stdio_restore(); - + if (env.worker_id != -1) + fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +static void sigint_handler(int signum) +{ + int i; + + for (i = 0; i < env.workers; i++) + if (env.worker_socks[i] > 0) + close(env.worker_socks[i]); +} + +static int current_test_idx; +static pthread_mutex_t current_test_lock; +static pthread_mutex_t stdout_output_lock; + +struct test_result { + int error_cnt; + int skip_cnt; + int sub_succ_cnt; + + size_t log_cnt; + char *log_buf; +}; + +static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; + +static inline const char *str_msg(const struct msg *msg, char *buf) +{ + switch (msg->type) { + case MSG_DO_TEST: + sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); + break; + case MSG_TEST_DONE: + sprintf(buf, "MSG_TEST_DONE %d (log: %d)", + msg->test_done.test_num, + msg->test_done.have_log); + break; + case MSG_TEST_LOG: + sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + strlen(msg->test_log.log_buf), + msg->test_log.is_last); + break; + case MSG_EXIT: + sprintf(buf, "MSG_EXIT"); + break; + default: + sprintf(buf, "UNKNOWN"); + break; + } + + return buf; +} + +static int send_message(int sock, const struct msg *msg) +{ + char buf[256]; + + if (env.debug) + fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf)); + return send(sock, msg, sizeof(*msg), 0); +} + +static int recv_message(int sock, struct msg *msg) +{ + int ret; + char buf[256]; + + memset(msg, 0, sizeof(*msg)); + ret = recv(sock, msg, sizeof(*msg), 0); + if (ret >= 0) { + if (env.debug) + fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf)); + } + return ret; +} + +static void run_one_test(int test_num) +{ + struct prog_test_def *test = &prog_test_defs[test_num]; + + env.test = test; + + if (test->run_test) + test->run_test(); + else if (test->run_serial_test) + test->run_serial_test(); + + /* ensure last sub-test is finalized properly */ + if (test->subtest_name) + test__end_subtest(); + + test->tested = true; + + dump_test_log(test, test->error_cnt); + + reset_affinity(); + restore_netns(); + if (test->need_cgroup_cleanup) + cleanup_cgroup_environment(); +} + +struct dispatch_data { + int worker_id; + int sock_fd; +}; + +static void *dispatch_thread(void *ctx) +{ + struct dispatch_data *data = ctx; + int sock_fd; + FILE *log_fd = NULL; + + sock_fd = data->sock_fd; + + while (true) { + int test_to_run = -1; + struct prog_test_def *test; + struct test_result *result; + + /* grab a test */ + { + pthread_mutex_lock(¤t_test_lock); + + if (current_test_idx >= prog_test_cnt) { + pthread_mutex_unlock(¤t_test_lock); + goto done; + } + + test = &prog_test_defs[current_test_idx]; + test_to_run = current_test_idx; + current_test_idx++; + + pthread_mutex_unlock(¤t_test_lock); + } + + if (!test->should_run || test->run_serial_test) + continue; + + /* run test through worker */ + { + struct msg msg_do_test; + + msg_do_test.type = MSG_DO_TEST; + msg_do_test.do_test.test_num = test_to_run; + if (send_message(sock_fd, &msg_do_test) < 0) { + perror("Fail to send command"); + goto done; + } + env.worker_current_test[data->worker_id] = test_to_run; + } + + /* wait for test done */ + { + int err; + struct msg msg_test_done; + + err = recv_message(sock_fd, &msg_test_done); + if (err < 0) + goto error; + if (msg_test_done.type != MSG_TEST_DONE) + goto error; + if (test_to_run != msg_test_done.test_done.test_num) + goto error; + + test->tested = true; + result = &test_results[test_to_run]; + + result->error_cnt = msg_test_done.test_done.error_cnt; + result->skip_cnt = msg_test_done.test_done.skip_cnt; + result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + + /* collect all logs */ + if (msg_test_done.test_done.have_log) { + log_fd = open_memstream(&result->log_buf, &result->log_cnt); + if (!log_fd) + goto error; + + while (true) { + struct msg msg_log; + + if (recv_message(sock_fd, &msg_log) < 0) + goto error; + if (msg_log.type != MSG_TEST_LOG) + goto error; + + fprintf(log_fd, "%s", msg_log.test_log.log_buf); + if (msg_log.test_log.is_last) + break; + } + fclose(log_fd); + log_fd = NULL; + } + /* output log */ + { + pthread_mutex_lock(&stdout_output_lock); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + pthread_mutex_unlock(&stdout_output_lock); + } + + } /* wait for test done */ + } /* while (true) */ +error: + if (env.debug) + fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); + + if (log_fd) + fclose(log_fd); +done: + { + struct msg msg_exit; + + msg_exit.type = MSG_EXIT; + if (send_message(sock_fd, &msg_exit) < 0) { + if (env.debug) + fprintf(stderr, "[%d]: send_message msg_exit: %s.\n", + data->worker_id, strerror(errno)); + } + } + return NULL; +} + +static void print_all_error_logs(void) +{ + int i; + + if (env.fail_cnt) + fprintf(stdout, "\nAll error logs:\n"); + + /* print error logs again */ + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test; + struct test_result *result; + + test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!test->tested || !result->error_cnt) + continue; + + fprintf(stdout, "\n#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + } +} + +static int server_main(void) +{ + pthread_t *dispatcher_threads; + struct dispatch_data *data; + struct sigaction sigact_int = { + .sa_handler = sigint_handler, + .sa_flags = SA_RESETHAND, + }; + int i; + + sigaction(SIGINT, &sigact_int, NULL); + + dispatcher_threads = calloc(sizeof(pthread_t), env.workers); + data = calloc(sizeof(struct dispatch_data), env.workers); + + env.worker_current_test = calloc(sizeof(int), env.workers); + for (i = 0; i < env.workers; i++) { + int rc; + + data[i].worker_id = i; + data[i].sock_fd = env.worker_socks[i]; + rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]); + if (rc < 0) { + perror("Failed to launch dispatcher thread"); + exit(EXIT_ERR_SETUP_INFRA); + } + } + + /* wait for all dispatcher to finish */ + for (i = 0; i < env.workers; i++) { + while (true) { + int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL); + + if (!ret) { + break; + } else if (ret == EBUSY) { + if (env.debug) + fprintf(stderr, "Still waiting for thread %d (test %d).\n", + i, env.worker_current_test[i] + 1); + usleep(1000 * 1000); + continue; + } else { + fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret); + break; + } + } + } + free(dispatcher_threads); + free(env.worker_current_test); + free(data); + + /* run serial tests */ + save_netns(); + + for (int i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result = &test_results[i]; + + if (!test->should_run || !test->run_serial_test) + continue; + + stdio_hijack(); + + run_one_test(i); + + stdio_restore(); + if (env.log_buf) { + result->log_cnt = env.log_cnt; + result->log_buf = strdup(env.log_buf); + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + restore_netns(); + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + + result->error_cnt = test->error_cnt; + result->skip_cnt = test->skip_cnt; + result->sub_succ_cnt = test->sub_succ_cnt; + } + + /* generate summary */ + fflush(stderr); + fflush(stdout); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *current_test; + struct test_result *result; + + current_test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!current_test->tested) + continue; + + env.succ_cnt += result->error_cnt ? 0 : 1; + env.skip_cnt += result->skip_cnt; + if (result->error_cnt) + env.fail_cnt++; + env.sub_succ_cnt += result->sub_succ_cnt; + } + + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + + print_all_error_logs(); + + /* reap all workers */ + for (i = 0; i < env.workers; i++) { + int wstatus, pid; + + pid = waitpid(env.worker_pids[i], &wstatus, 0); + if (pid != env.worker_pids[i]) + perror("Unable to reap worker"); + } + + return 0; +} + +static int worker_main(int sock) +{ + save_netns(); + + while (true) { + /* receive command */ + struct msg msg; + + if (recv_message(sock, &msg) < 0) + goto out; + + switch (msg.type) { + case MSG_EXIT: + if (env.debug) + fprintf(stderr, "[%d]: worker exit.\n", + env.worker_id); + goto out; + case MSG_DO_TEST: { + int test_to_run; + struct prog_test_def *test; + struct msg msg_done; + + test_to_run = msg.do_test.test_num; + test = &prog_test_defs[test_to_run]; + + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s running.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + + stdio_hijack(); + + run_one_test(test_to_run); + + stdio_restore(); + + memset(&msg_done, 0, sizeof(msg_done)); + msg_done.type = MSG_TEST_DONE; + msg_done.test_done.test_num = test_to_run; + msg_done.test_done.error_cnt = test->error_cnt; + msg_done.test_done.skip_cnt = test->skip_cnt; + msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; + msg_done.test_done.have_log = false; + + if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { + if (env.log_cnt) + msg_done.test_done.have_log = true; + } + if (send_message(sock, &msg_done) < 0) { + perror("Fail to send message done"); + goto out; + } + + /* send logs */ + if (msg_done.test_done.have_log) { + char *src; + size_t slen; + + src = env.log_buf; + slen = env.log_cnt; + while (slen) { + struct msg msg_log; + char *dest; + size_t len; + + memset(&msg_log, 0, sizeof(msg_log)); + msg_log.type = MSG_TEST_LOG; + dest = msg_log.test_log.log_buf; + len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; + memcpy(dest, src, len); + + src += len; + slen -= len; + if (!slen) + msg_log.test_log.is_last = true; + + assert(send_message(sock, &msg_log) >= 0); + } + } + if (env.log_buf) { + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s done.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + break; + } /* case MSG_DO_TEST */ + default: + if (env.debug) + fprintf(stderr, "[%d]: unknown message.\n", env.worker_id); + return -1; + } + } +out: + return 0; +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -770,7 +1327,7 @@ int main(int argc, char **argv) struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; + }; int err, i; sigaction(SIGSEGV, &sigact, NULL); @@ -798,21 +1355,84 @@ int main(int argc, char **argv) return -1; } - save_netns(); - stdio_hijack(); + env.stdout = stdout; + env.stderr = stderr; + env.has_testmod = true; if (!env.list_test_names && load_bpf_testmod()) { fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); env.has_testmod = false; } + + /* initializing tests */ for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - env.test = test; test->test_num = i + 1; - - if (!should_run(&env.test_selector, + if (should_run(&env.test_selector, test->test_num, test->test_name)) + test->should_run = true; + else + test->should_run = false; + + if ((test->run_test == NULL && test->run_serial_test == NULL) || + (test->run_test != NULL && test->run_serial_test != NULL)) { + fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n", + test->test_num, test->test_name, test->test_name, test->test_name); + exit(EXIT_ERR_SETUP_INFRA); + } + } + + /* ignore workers if we are just listing */ + if (env.get_test_cnt || env.list_test_names) + env.workers = 0; + + /* launch workers if requested */ + env.worker_id = -1; /* main process */ + if (env.workers) { + env.worker_pids = calloc(sizeof(__pid_t), env.workers); + env.worker_socks = calloc(sizeof(int), env.workers); + if (env.debug) + fprintf(stdout, "Launching %d workers.\n", env.workers); + for (i = 0; i < env.workers; i++) { + int sv[2]; + pid_t pid; + + if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) { + perror("Fail to create worker socket"); + return -1; + } + pid = fork(); + if (pid < 0) { + perror("Failed to fork worker"); + return -1; + } else if (pid != 0) { /* main process */ + close(sv[1]); + env.worker_pids[i] = pid; + env.worker_socks[i] = sv[0]; + } else { /* inside each worker process */ + close(sv[0]); + env.worker_id = i; + return worker_main(sv[1]); + } + } + + if (env.worker_id == -1) { + server_main(); + goto out; + } + } + + /* The rest of the main process */ + + /* on single mode */ + save_netns(); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result; + + if (!test->should_run) continue; if (env.get_test_cnt) { @@ -826,33 +1446,35 @@ int main(int argc, char **argv) continue; } - test->run_test(); - /* ensure last sub-test is finalized properly */ - if (test->subtest_name) - test__end_subtest(); + stdio_hijack(); - test->tested = true; + run_one_test(i); - dump_test_log(test, test->error_cnt); + stdio_restore(); fprintf(env.stdout, "#%d %s:%s\n", test->test_num, test->test_name, test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + result = &test_results[i]; + result->error_cnt = test->error_cnt; + if (env.log_buf) { + result->log_buf = strdup(env.log_buf); + result->log_cnt = env.log_cnt; + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (test->error_cnt) env.fail_cnt++; else env.succ_cnt++; - skip_account(); - reset_affinity(); - restore_netns(); - if (test->need_cgroup_cleanup) - cleanup_cgroup_environment(); + skip_account(); + env.sub_succ_cnt += test->sub_succ_cnt; } - if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); - stdio_restore(); if (env.get_test_cnt) { printf("%d\n", env.succ_cnt); @@ -865,14 +1487,18 @@ int main(int argc, char **argv) fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + print_all_error_logs(); + + close(env.saved_netns_fd); out: + if (!env.list_test_names && env.has_testmod) + unload_bpf_testmod(); free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); - close(env.saved_netns_fd); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index c8c2bf878f67..93c1ff705533 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -62,6 +62,7 @@ struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; bool verifier_stats; + bool debug; enum verbosity verbosity; bool jit_enabled; @@ -69,7 +70,8 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; + struct prog_test_def *test; /* current running tests */ + FILE *stdout; FILE *stderr; char *log_buf; @@ -82,6 +84,38 @@ struct test_env { int skip_cnt; /* skipped tests */ int saved_netns_fd; + int workers; /* number of worker process */ + int worker_id; /* id number of current worker, main process is -1 */ + pid_t *worker_pids; /* array of worker pids */ + int *worker_socks; /* array of worker socks */ + int *worker_current_test; /* array of current running test for each worker */ +}; + +#define MAX_LOG_TRUNK_SIZE 8192 +enum msg_type { + MSG_DO_TEST = 0, + MSG_TEST_DONE = 1, + MSG_TEST_LOG = 2, + MSG_EXIT = 255, +}; +struct msg { + enum msg_type type; + union { + struct { + int test_num; + } do_test; + struct { + int test_num; + int sub_succ_cnt; + int error_cnt; + int skip_cnt; + bool have_log; + } test_done; + struct { + char log_buf[MAX_LOG_TRUNK_SIZE + 1]; + bool is_last; + } test_log; + }; }; extern struct test_env env; @@ -291,6 +325,8 @@ int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); int kern_sync_rcu(void); +int trigger_module_test_read(int read_sz); +int trigger_module_test_write(int write_sz); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" @@ -299,3 +335,5 @@ int kern_sync_rcu(void); #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif + +#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index eefd445b96fc..1ba7e7346afb 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -139,6 +139,7 @@ struct sockmap_options { bool sendpage; bool data_test; bool drop_expected; + bool check_recved_len; int iov_count; int iov_length; int rate; @@ -556,8 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, int err, i, flags = MSG_NOSIGNAL; bool drop = opt->drop_expected; bool data = opt->data_test; + int iov_alloc_length = iov_length; - err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (!tx && opt->check_recved_len) + iov_alloc_length *= 2; + + err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx); if (err) goto out_errno; if (peek_flag) { @@ -665,6 +670,13 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; + if (opt->check_recved_len && s->bytes_recvd > total_bytes) { + errno = EMSGSIZE; + fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n", + s->bytes_recvd, total_bytes); + goto out_errno; + } + if (data) { int chunk_sz = opt->sendpage ? iov_length * cnt : @@ -744,7 +756,8 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); + if (txmsg_pop || txmsg_start_pop) + iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -1680,12 +1693,27 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; skb_use_parser = 512; + if (ktls == 1) + skb_use_parser = 570; opt->iov_length = 256; opt->iov_count = 1; opt->rate = 2; test_exec(cgrp, opt); } +static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt) +{ + if (ktls == 1) + return; + skb_use_parser = 10; + opt->iov_length = 20; + opt->iov_count = 1; + opt->rate = 1; + opt->check_recved_len = true; + test_exec(cgrp, opt); + opt->check_recved_len = false; +} + char *map_names[] = { "sock_map", "sock_map_txmsg", @@ -1784,7 +1812,8 @@ struct _test test[] = { {"txmsg test pull-data", test_txmsg_pull}, {"txmsg test pop-data", test_txmsg_pop}, {"txmsg test push/pop data", test_txmsg_push_pop}, - {"txmsg text ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser2", test_txmsg_ingress_parser2}, }; static int check_whitelist(struct _test *t, struct sockmap_options *opt) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a20a919244c0..a3bb6d399daa 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -124,7 +124,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok narrow", .insns = { /* u64 w = (u16)write & 1; */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), #else @@ -184,7 +184,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), #else diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh index 9b3617d770a5..6413c1472554 100755 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -76,8 +76,8 @@ DIR=$(dirname $0) TEST_IF=lo MAX_PING_TRIES=5 BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o" -CLSACT_SECTION="clsact/check_syncookie" -XDP_SECTION="xdp/check_syncookie" +CLSACT_SECTION="tc" +XDP_SECTION="xdp" BPF_PROG_ID=0 PROG="${DIR}/test_tcp_check_syncookie_user" diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 1ccbe804e8e1..ca1372924023 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -168,14 +168,15 @@ add_vxlan_tunnel() ip netns exec at_ns0 \ ip link set dev $DEV_NS address 52:54:00:d9:01:00 up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 + ip netns exec at_ns0 \ + ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF # root namespace ip link add dev $DEV type $TYPE external gbp dstport 4789 ip link set dev $DEV address 52:54:00:d9:02:00 up ip addr add dev $DEV 10.1.1.200/24 - arp -s 10.1.1.100 52:54:00:d9:01:00 + ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV } add_ip6vxlan_tunnel() diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3a9e332c5e36..25afe423b3f0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -47,6 +47,10 @@ #include "test_btf.h" #include "../../../include/linux/filter.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 @@ -974,7 +978,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, if (err) { switch (saved_errno) { - case 524/*ENOTSUPP*/: + case ENOTSUPP: printf("Did not run the program (not supported) "); return 0; case EPERM: @@ -1119,6 +1123,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } + if (fd_prog < 0 && saved_errno == ENOTSUPP) { + printf("SKIP (program uses an unsupported feature)\n"); + skips++; + goto close_fds; + } + alignment_prevented_execution = 0; if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 637fcf4fe4e3..d10cefd6eb09 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -1,5 +1,8 @@ #!/bin/sh +# Kselftest framework requirement - SKIP code is 4. +readonly KSFT_SKIP=4 + cleanup() { if [ "$?" = "0" ]; then @@ -17,7 +20,7 @@ cleanup() ip link set dev lo xdp off 2>/dev/null > /dev/null if [ $? -ne 0 ];then echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit 0 + exit $KSFT_SKIP fi set -e diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index c033850886f4..57c8db9972a6 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -52,8 +52,8 @@ test_xdp_redirect() return 0 fi - ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null - ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null + ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null + ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 1538373157e3..351955c2bdfd 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -88,7 +88,7 @@ setup_ns() # Add a neigh entry for IPv4 ping test ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 ip -n ns$i link set veth0 $mode obj \ - xdp_dummy.o sec xdp_dummy &> /dev/null || \ + xdp_dummy.o sec xdp &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}') diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index 995278e684b6..a3a1eaee26ea 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 -ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy +ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp -ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy +ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp trap cleanup EXIT diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index bb8b0da91686..0cbc7604a2f8 100755 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 # Author: Jesper Dangaard Brouer <hawk@kernel.org> +# Kselftest framework requirement - SKIP code is 4. +readonly KSFT_SKIP=4 + # Allow wrapper scripts to name test if [ -z "$TESTNAME" ]; then TESTNAME=xdp_vlan @@ -94,7 +97,7 @@ while true; do -h | --help ) usage; echo "selftests: $TESTNAME [SKIP] usage help info requested" - exit 0 + exit $KSFT_SKIP ;; * ) shift @@ -117,7 +120,7 @@ fi ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null if [ $? -ne 0 ]; then echo "selftests: $TESTNAME [SKIP] need ip xdp support" - exit 0 + exit $KSFT_SKIP fi # Interactive mode likely require us to cleanup netns diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index e7a19b04d4ea..7b7f918eda77 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1b1c798e9248..1b138cd2b187 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -186,7 +186,7 @@ }, .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", + .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 6e52dfc64415..c22dc83a41fd 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -119,3 +119,41 @@ }, .result = ACCEPT, }, +{ + "Dest pointer in r0 - fail", + .insns = { + /* val = 0; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, 1); */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (r0 != 0) exit(1); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Dest pointer in r0 - succeed", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r0 = atomic_cmpxchg(&val, r0, 0); */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c new file mode 100644 index 000000000000..3bc9ff7a860b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -0,0 +1,57 @@ +#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ + { \ + "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ + .insns = { \ + /* u64 val = operan1; */ \ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1), \ + /* u64 old = atomic_fetch_add(&val, operand2); */ \ + BPF_MOV64_REG(dst_reg, BPF_REG_10), \ + BPF_MOV64_IMM(src_reg, operand2), \ + BPF_ATOMIC_OP(BPF_DW, op, \ + dst_reg, src_reg, -8), \ + /* if (old != operand1) exit(1); */ \ + BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + /* if (val != result) exit (2); */ \ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), \ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 2), \ + BPF_EXIT_INSN(), \ + /* exit(0); */ \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = ACCEPT, \ + } +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011), +#undef __ATOMIC_FETCH_OP_TEST diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c new file mode 100644 index 000000000000..39272720b2f6 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c @@ -0,0 +1,25 @@ +#define __INVALID_ATOMIC_ACCESS_TEST(op) \ + { \ + "atomic " #op " access through non-pointer ", \ + .insns = { \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_MOV64_IMM(BPF_REG_1, 0), \ + BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = REJECT, \ + .errstr = "R1 invalid mem access 'inv'" \ + } +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG), +__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG), diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 336a749673d1..d7b74eb28333 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1,4 +1,27 @@ { + "calls: invalid kfunc call not eliminated", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = REJECT, + .errstr = "invalid kernel function call not eliminated in verifier pass", +}, +{ + "calls: invalid kfunc call unreachable", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 2022c0f2cd75..83cecfbd6739 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -502,7 +502,7 @@ "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -537,7 +537,7 @@ "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -646,7 +646,7 @@ "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -661,7 +661,7 @@ "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), #else @@ -676,7 +676,7 @@ "check skb->hash half load not permitted, unaligned 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), #else @@ -693,7 +693,7 @@ "check skb->hash half load not permitted, unaligned 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -951,7 +951,7 @@ "check skb->data half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, data)), #else @@ -1058,6 +1058,66 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "padding after gso_size is not accessible", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetofend(struct __sk_buff, gso_size)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=180 size=4", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "read hwtstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hwtstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read hwtstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, hwtstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write hwtstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, hwtstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=184 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read hwtstamp from CLS", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hwtstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ "check wire_len is not readable by sockets", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index df215e004566..79021c30e51e 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -62,6 +62,11 @@ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), @@ -73,11 +78,69 @@ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL), + BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), - BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), - BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: various div tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL), + BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0xbeefULL), + BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0x2bULL), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_0, 2), diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c index 2cab6a3966bb..5c8944d0b091 100644 --- a/tools/testing/selftests/bpf/verifier/lwt.c +++ b/tools/testing/selftests/bpf/verifier/lwt.c @@ -174,7 +174,7 @@ "check skb->tc_classid half load not permitted for lwt prog", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, tc_classid)), #else diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c index 471c1a5950d8..d8a9b1a1f9a2 100644 --- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c +++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c @@ -2,7 +2,7 @@ "check bpf_perf_event_data->sample_period byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -18,7 +18,7 @@ "check bpf_perf_event_data->sample_period half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -34,7 +34,7 @@ "check bpf_perf_event_data->sample_period word load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 0b943897aaf6..c9991c3f3bd2 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -104,3 +104,164 @@ .result = ACCEPT, .retval = POINTER_VALUE, }, +{ + "Spill and refill a u32 const scalar. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r4 = 20 */ + BPF_MOV32_IMM(BPF_REG_4, 20), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = *(u32 *)(r10 -8) */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv20 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=inv20 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "Spill a u32 const, refill from another half of the uninit u32 from the stack", + .insns = { + /* r4 = 20 */ + BPF_MOV32_IMM(BPF_REG_4, 20), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid read from stack off -4+0 size 4", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "Spill a u32 const scalar. Refill as u16. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r4 = 20 */ + BPF_MOV32_IMM(BPF_REG_4, 20), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = *(u16 *)(r10 -8) */ + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r4 = 20 */ + BPF_MOV32_IMM(BPF_REG_4, 20), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = *(u16 *)(r10 -6) */ + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r4 = 20 */ + BPF_MOV32_IMM(BPF_REG_4, 20), + /* *(u32 *)(r10 -8) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* *(u32 *)(r10 -4) = r4 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4), + /* r4 = *(u32 *)(r10 -4), */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=U32_MAX */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "Spill and refill a umax=40 bounded scalar. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* *(u32 *)(r10 -8) = r4 R4=inv,umax=40 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8), + /* r4 = (*u32 *)(r10 - 8) */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8), + /* r2 += r4 R2=pkt R4=inv,umax=40 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4), + /* r0 = r2 R2=pkt,umax=40 R4=inv,umax=40 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20), + /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 1), + /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 8889b3f55236..027198768fad 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -224,10 +224,10 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - -cpu kvm64 \ + -cpu host \ -enable-kvm \ - -smp 4 \ - -m 2G \ + -smp 8 \ + -m 4G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ -kernel "${kernel_bzimage}" \ -append "root=/dev/vda rw console=ttyS0,115200" diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 842d9155d36c..30f12637f4e4 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -178,9 +178,8 @@ int main(int argc, char **argv) return 1; } - main_prog = bpf_object__find_program_by_title(obj, - server ? "xdpserver" : - "xdpclient"); + main_prog = bpf_object__find_program_by_name(obj, + server ? "xdping_server" : "xdping_client"); if (main_prog) prog_fd = bpf_program__fd(main_prog); if (!main_prog || prog_fd < 0) { @@ -188,7 +187,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (map) map_fd = bpf_map__fd(map); if (!map || map_fd < 0) { diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index f53ce2683f8d..6c7cf8aadc79 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -19,7 +19,7 @@ * Virtual Ethernet interfaces. * * For each mode, the following tests are run: - * a. nopoll - soft-irq processing + * a. nopoll - soft-irq processing in run-to-completion mode * b. poll - using poll() syscall * c. Socket Teardown * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy @@ -45,6 +45,10 @@ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, * then remove xsk sockets from queue 0 on both veth interfaces and * finally run a traffic on queues ids 1 + * g. unaligned mode + * h. tests for invalid and corner case Tx descriptors so that the correct ones + * are discarded and let through, respectively. + * i. 2K frame size tests * * Total tests: 12 * @@ -112,13 +116,10 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) -#define print_ksft_result(void)\ - (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\ - test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\ - test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\ - test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\ - test_type == TEST_TYPE_STATS ? "Stats" : "",\ - test_type == TEST_TYPE_BPF_RES ? "BPF RES" : "")) +#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" + +#define print_ksft_result(test) \ + (ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name)) static void memset32_htonl(void *dest, u32 val, u32 size) { @@ -235,80 +236,46 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); } -static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx) +static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) { struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, - .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .frame_headroom = frame_headroom, + .frame_size = umem->frame_size, + .frame_headroom = umem->frame_headroom, .flags = XSK_UMEM__DEFAULT_FLAGS }; - struct xsk_umem_info *umem; int ret; - umem = calloc(1, sizeof(struct xsk_umem_info)); - if (!umem) - exit_with_error(errno); + if (umem->unaligned_mode) + cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, &cfg); if (ret) - exit_with_error(-ret); + return ret; umem->buffer = buffer; - - data->umem_arr[idx] = umem; -} - -static void xsk_populate_fill_ring(struct xsk_umem_info *umem) -{ - int ret, i; - u32 idx = 0; - - ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); - if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) - exit_with_error(-ret); - for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE; - xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); + return 0; } -static int xsk_configure_socket(struct ifobject *ifobject, int idx) +static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, u32 qid) { struct xsk_socket_config cfg; - struct xsk_socket_info *xsk; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; - int ret; - - xsk = calloc(1, sizeof(struct xsk_socket_info)); - if (!xsk) - exit_with_error(errno); - xsk->umem = ifobject->umem; - cfg.rx_size = rxqsize; + xsk->umem = umem; + cfg.rx_size = xsk->rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; cfg.libbpf_flags = 0; - cfg.xdp_flags = xdp_flags; - cfg.bind_flags = xdp_bind_flags; - - if (test_type != TEST_TYPE_BIDI) { - rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL; - txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL; - } else { - rxr = &xsk->rx; - txr = &xsk->tx; - } - - ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx, - ifobject->umem->umem, rxr, txr, &cfg); - if (ret) - return 1; + cfg.xdp_flags = ifobject->xdp_flags; + cfg.bind_flags = ifobject->bind_flags; - ifobject->xsk_arr[idx] = xsk; - - return 0; + txr = ifobject->tx_on ? &xsk->tx : NULL; + rxr = ifobject->rx_on ? &xsk->rx : NULL; + return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg); } static struct option long_options[] = { @@ -354,45 +321,44 @@ static int switch_namespace(const char *nsname) return nsfd; } -static int validate_interfaces(void) +static bool validate_interface(struct ifobject *ifobj) { - bool ret = true; - - for (int i = 0; i < MAX_INTERFACES; i++) { - if (!strcmp(ifdict[i]->ifname, "")) { - ret = false; - ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>."); - } - } - return ret; + if (!strcmp(ifobj->ifname, "")) + return false; + return true; } -static void parse_command_line(int argc, char **argv) +static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc, + char **argv) { - int option_index, interface_index = 0, c; + struct ifobject *ifobj; + u32 interface_nb = 0; + int option_index, c; opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:Dv", long_options, &option_index); + char *sptr, *token; + c = getopt_long(argc, argv, "i:Dv", long_options, &option_index); if (c == -1) break; switch (c) { case 'i': - if (interface_index == MAX_INTERFACES) + if (interface_nb == 0) + ifobj = ifobj_tx; + else if (interface_nb == 1) + ifobj = ifobj_rx; + else break; - char *sptr, *token; sptr = strndupa(optarg, strlen(optarg)); - memcpy(ifdict[interface_index]->ifname, - strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); + memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); token = strsep(&sptr, ","); if (token) - memcpy(ifdict[interface_index]->nsname, token, - MAX_INTERFACES_NAMESPACE_CHARS); - interface_index++; + memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); + interface_nb++; break; case 'D': opt_pkt_dump = true; @@ -405,11 +371,85 @@ static void parse_command_line(int argc, char **argv) ksft_exit_xfail(); } } +} - if (!validate_interfaces()) { - usage(basename(argv[0])); - ksft_exit_xfail(); +static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx) +{ + u32 i, j; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->umem = &ifobj->umem_arr[0]; + ifobj->xsk = &ifobj->xsk_arr[0]; + ifobj->use_poll = false; + ifobj->pacing_on = true; + ifobj->pkt_stream = test->pkt_stream_default; + + if (i == 0) { + ifobj->rx_on = false; + ifobj->tx_on = true; + } else { + ifobj->rx_on = true; + ifobj->tx_on = false; + } + + for (j = 0; j < MAX_SOCKETS; j++) { + memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j])); + memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); + ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + } + } + + test->ifobj_tx = ifobj_tx; + test->ifobj_rx = ifobj_rx; + test->current_step = 0; + test->total_steps = 1; + test->nb_sockets = 1; +} + +static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = test->pkt_stream_default; + memset(test, 0, sizeof(*test)); + test->pkt_stream_default = pkt_stream; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + if (mode == TEST_MODE_SKB) + ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE; + else + ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE; + + ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; } + + __test_spec_init(test, ifobj_tx, ifobj_rx); +} + +static void test_spec_reset(struct test_spec *test) +{ + __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); +} + +static void test_spec_set_name(struct test_spec *test, const char *name) +{ + strncpy(test->name, name, MAX_TEST_NAME_SIZE); +} + +static void pkt_stream_reset(struct pkt_stream *pkt_stream) +{ + if (pkt_stream) + pkt_stream->rx_pkt_nb = 0; } static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) @@ -420,29 +460,104 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) return &pkt_stream->pkts[pkt_nb]; } -static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream) +{ + while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; + pkt_stream->rx_pkt_nb++; + } + return NULL; +} + +static void pkt_stream_delete(struct pkt_stream *pkt_stream) +{ + free(pkt_stream->pkts); + free(pkt_stream); +} + +static void pkt_stream_restore_default(struct test_spec *test) +{ + if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) { + pkt_stream_delete(test->ifobj_tx->pkt_stream); + test->ifobj_tx->pkt_stream = test->pkt_stream_default; + } + test->ifobj_rx->pkt_stream = test->pkt_stream_default; +} + +static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) { struct pkt_stream *pkt_stream; - u32 i; - pkt_stream = malloc(sizeof(*pkt_stream)); + pkt_stream = calloc(1, sizeof(*pkt_stream)); if (!pkt_stream) - exit_with_error(ENOMEM); + return NULL; pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); - if (!pkt_stream->pkts) + if (!pkt_stream->pkts) { + free(pkt_stream); + return NULL; + } + + pkt_stream->nb_pkts = nb_pkts; + return pkt_stream; +} + +static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) exit_with_error(ENOMEM); pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE; + pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size; pkt_stream->pkts[i].len = pkt_len; pkt_stream->pkts[i].payload = i; + + if (pkt_len > umem->frame_size) + pkt_stream->pkts[i].valid = false; + else + pkt_stream->pkts[i].valid = true; } return pkt_stream; } +static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, + struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); +} + +static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; +} + +static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); + for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) { + pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset; + pkt_stream->pkts[i].len = pkt_len; + } + + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; +} + static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) { struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); @@ -453,6 +568,8 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) if (!pkt) return NULL; + if (!pkt->valid || pkt->len < PKT_SIZE) + return pkt; data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); @@ -467,6 +584,26 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) return pkt; } +static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; + + for (i = 0; i < nb_pkts; i++) { + pkt_stream->pkts[i].addr = pkts[i].addr; + pkt_stream->pkts[i].len = pkts[i].len; + pkt_stream->pkts[i].payload = i; + pkt_stream->pkts[i].valid = pkts[i].valid; + } +} + static void pkt_dump(void *pkt, u32 len) { char s[INET_ADDRSTRLEN]; @@ -504,9 +641,28 @@ static void pkt_dump(void *pkt, u32 len) fprintf(stdout, "---------------------------------------\n"); } -static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc) +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, + u64 pkt_stream_addr) { - void *data = xsk_umem__get_data(buffer, desc->addr); + u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; + u32 offset = addr % umem->frame_size, expected_offset = 0; + + if (!pkt_stream->use_addr_for_fill) + pkt_stream_addr = 0; + + expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + + if (offset == expected_offset) + return true; + + ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset, + offset); + return false; +} + +static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +{ + void *data = xsk_umem__get_data(buffer, addr); struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); if (!pkt) { @@ -514,19 +670,24 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *d return false; } + if (len < PKT_SIZE) { + /*Do not try to verify packets that are smaller than minimum size. */ + return true; + } + + if (pkt->len != len) { + ksft_test_result_fail + ("ERROR: [%s] expected length [%d], got length [%d]\n", + __func__, pkt->len, len); + return false; + } + if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); - if (opt_pkt_dump && test_type != TEST_TYPE_STATS) + if (opt_pkt_dump) pkt_dump(data, PKT_SIZE); - if (pkt->len != desc->len) { - ksft_test_result_fail - ("ERROR: [%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, desc->len); - return false; - } - if (pkt->payload != seqnum) { ksft_test_result_fail ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n", @@ -558,14 +719,20 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) unsigned int rcvd; u32 idx; - if (!xsk->outstanding_tx) - return; - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd) { + if (rcvd > xsk->outstanding_tx) { + u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); + + ksft_test_result_fail("ERROR: [%s] Too many packets completed\n", + __func__); + ksft_print_msg("Last completion address: %llx\n", addr); + return; + } + xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; } @@ -574,15 +741,16 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk, struct pollfd *fds) { - u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0; - struct pkt *pkt; + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + struct xsk_umem_info *umem = xsk->umem; + u32 idx_rx = 0, idx_fq = 0, rcvd, i; + u32 total = 0; int ret; - pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++); while (pkt) { rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); @@ -590,40 +758,58 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * continue; } - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); } - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); u64 addr = desc->addr, orig; + if (!pkt) { + ksft_test_result_fail("ERROR: [%s] Received too many packets.\n", + __func__); + ksft_print_msg("Last packet has addr: %llx len: %u\n", + addr, desc->len); + return; + } + orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - if (!is_pkt_valid(pkt, xsk->umem->buffer, desc)) + + if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len)) + return; + if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr)) return; - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; - pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++); + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + pkt = pkt_stream_get_next_rx_pkt(pkt_stream); } - xsk_ring_prod__submit(&xsk->umem->fq, rcvd); + xsk_ring_prod__submit(&umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= rcvd; + total += rcvd; + if (pkts_in_flight < umem->num_frames) + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); } } static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) { struct xsk_socket_info *xsk = ifobject->xsk; - u32 i, idx; + u32 i, idx, valid_pkts = 0; while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) complete_pkts(xsk, BATCH_SIZE); @@ -638,15 +824,23 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) tx_desc->addr = pkt->addr; tx_desc->len = pkt->len; pkt_nb++; + if (pkt->valid) + valid_pkts++; } - xsk_ring_prod__submit(&xsk->tx, i); - if (stat_test_type != STAT_TEST_TX_INVALID) - xsk->outstanding_tx += i; - else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight += valid_pkts; + if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) { kick_tx(xsk); + pthread_cond_wait(&pacing_cond, &pacing_mutex); + } + pthread_mutex_unlock(&pacing_mutex); + + xsk_ring_prod__submit(&xsk->tx, i); + xsk->outstanding_tx += valid_pkts; complete_pkts(xsk, i); + usleep(10); return i; } @@ -658,29 +852,25 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static void send_pkts(struct ifobject *ifobject) { - struct pollfd fds[MAX_SOCKS] = { }; + struct pollfd fds = { }; u32 pkt_cnt = 0; - fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); - fds[0].events = POLLOUT; + fds.fd = xsk_socket__fd(ifobject->xsk->xsk); + fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - u32 sent; - - if (test_type == TEST_TYPE_POLL) { + if (ifobject->use_poll) { int ret; - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (ret <= 0) continue; - if (!(fds[0].revents & POLLOUT)) + if (!(fds.revents & POLLOUT)) continue; } - sent = __send_pkts(ifobject, pkt_cnt); - pkt_cnt += sent; - usleep(10); + pkt_cnt += __send_pkts(ifobject, pkt_cnt); } wait_for_tx_completion(ifobject->xsk); @@ -698,7 +888,7 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", + ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", __func__, -err, strerror(-err)); return true; } @@ -739,7 +929,7 @@ static void tx_stats_validate(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", + ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", __func__, -err, strerror(-err)); return; } @@ -751,71 +941,62 @@ static void tx_stats_validate(struct ifobject *ifobject) __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); } -static void thread_common_ops(struct ifobject *ifobject, void *bufs) +static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { - u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - size_t mmap_sz = umem_sz; - int ctr = 0; - int ret; + u32 i; ifobject->ns_fd = switch_namespace(ifobject->nsname); - if (test_type == TEST_TYPE_BPF_RES) - mmap_sz *= 2; + if (ifobject->umem->unaligned_mode) + mmap_flags |= MAP_HUGETLB; - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); + for (i = 0; i < test->nb_sockets; i++) { + u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; + u32 ctr = 0; + void *bufs; + int ret; - while (ctr++ < SOCK_RECONF_CTR) { - xsk_configure_umem(ifobject, bufs, umem_sz, 0); - ifobject->umem = ifobject->umem_arr[0]; - ret = xsk_configure_socket(ifobject, 0); - if (!ret) - break; + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); - /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */ - usleep(USLEEP_MAX); - if (ctr >= SOCK_RECONF_CTR) + ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); + if (ret) exit_with_error(-ret); - } - ifobject->umem = ifobject->umem_arr[0]; - ifobject->xsk = ifobject->xsk_arr[0]; + while (ctr++ < SOCK_RECONF_CTR) { + ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i], + ifobject, i); + if (!ret) + break; - if (test_type == TEST_TYPE_BPF_RES) { - xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1); - ifobject->umem = ifobject->umem_arr[1]; - ret = xsk_configure_socket(ifobject, 1); + /* Retry if it fails as xsk_socket__create() is asynchronous */ + if (ctr >= SOCK_RECONF_CTR) + exit_with_error(-ret); + usleep(USLEEP_MAX); + } } - ifobject->umem = ifobject->umem_arr[0]; - ifobject->xsk = ifobject->xsk_arr[0]; - print_verbose("Interface [%s] vector [%s]\n", - ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx"); -} - -static bool testapp_is_test_two_stepped(void) -{ - return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step; + ifobject->umem = &ifobject->umem_arr[0]; + ifobject->xsk = &ifobject->xsk_arr[0]; } static void testapp_cleanup_xsk_res(struct ifobject *ifobj) { - if (testapp_is_test_two_stepped()) { - xsk_socket__delete(ifobj->xsk->xsk); - (void)xsk_umem__delete(ifobj->umem->umem); - } + print_verbose("Destroying socket\n"); + xsk_socket__delete(ifobj->xsk->xsk); + munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size); + xsk_umem__delete(ifobj->umem->umem); } static void *worker_testapp_validate_tx(void *arg) { - struct ifobject *ifobject = (struct ifobject *)arg; - void *bufs = NULL; + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_tx; - if (!second_step) - thread_common_ops(ifobject, bufs); + if (test->current_step == 1) + thread_common_ops(test, ifobject); print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, ifobject->ifname); @@ -824,24 +1005,55 @@ static void *worker_testapp_validate_tx(void *arg) if (stat_test_type == STAT_TEST_TX_INVALID) tx_stats_validate(ifobject); - testapp_cleanup_xsk_res(ifobject); + if (test->total_steps == test->current_step) + testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } +static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) +{ + u32 idx = 0, i, buffers_to_fill; + int ret; + + if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + buffers_to_fill = umem->num_frames; + else + buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); + if (ret != buffers_to_fill) + exit_with_error(ENOSPC); + for (i = 0; i < buffers_to_fill; i++) { + u64 addr; + + if (pkt_stream->use_addr_for_fill) { + struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); + + if (!pkt) + break; + addr = pkt->addr; + } else { + addr = i * umem->frame_size; + } + + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + } + xsk_ring_prod__submit(&umem->fq, buffers_to_fill); +} + static void *worker_testapp_validate_rx(void *arg) { - struct ifobject *ifobject = (struct ifobject *)arg; - struct pollfd fds[MAX_SOCKS] = { }; - void *bufs = NULL; + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_rx; + struct pollfd fds = { }; - if (!second_step) - thread_common_ops(ifobject, bufs); + if (test->current_step == 1) + thread_common_ops(test, ifobject); - if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) - xsk_populate_fill_ring(ifobject->umem); + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); - fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); - fds[0].events = POLLIN; + fds.fd = xsk_socket__fd(ifobject->xsk->xsk); + fds.events = POLLIN; pthread_barrier_wait(&barr); @@ -849,151 +1061,239 @@ static void *worker_testapp_validate_rx(void *arg) while (!rx_stats_are_valid(ifobject)) continue; else - receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds); + receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds); - if (test_type == TEST_TYPE_TEARDOWN) - print_verbose("Destroying socket\n"); - - testapp_cleanup_xsk_res(ifobject); + if (test->total_steps == test->current_step) + testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } -static void testapp_validate(void) +static void testapp_validate_traffic(struct test_spec *test) { - bool bidi = test_type == TEST_TYPE_BIDI; - bool bpf = test_type == TEST_TYPE_BPF_RES; - struct pkt_stream *pkt_stream; + struct ifobject *ifobj_tx = test->ifobj_tx; + struct ifobject *ifobj_rx = test->ifobj_rx; + pthread_t t0, t1; if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); - if (stat_test_type == STAT_TEST_TX_INVALID) - pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); - else - pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE); - ifdict_tx->pkt_stream = pkt_stream; - ifdict_rx->pkt_stream = pkt_stream; + test->current_step++; + pkt_stream_reset(ifobj_rx->pkt_stream); + pkts_in_flight = 0; /*Spawn RX thread */ - pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx); + pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); pthread_barrier_wait(&barr); if (pthread_barrier_destroy(&barr)) exit_with_error(errno); /*Spawn TX thread */ - pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx); + pthread_create(&t1, NULL, ifobj_tx->func_ptr, test); pthread_join(t1, NULL); pthread_join(t0, NULL); - - if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS)) - print_ksft_result(); } -static void testapp_teardown(void) +static void testapp_teardown(struct test_spec *test) { int i; + test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - print_verbose("Creating socket\n"); - testapp_validate(); + testapp_validate_traffic(test); + test_spec_reset(test); } - - print_ksft_result(); } -static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2) +static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) { - void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr; - enum fvector tmp_vector = ifobj1->fv.vector; + thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; + struct ifobject *tmp_ifobj = (*ifobj1); - ifobj1->func_ptr = ifobj2->func_ptr; - ifobj1->fv.vector = ifobj2->fv.vector; + (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; + (*ifobj2)->func_ptr = tmp_func_ptr; - ifobj2->func_ptr = tmp_func_ptr; - ifobj2->fv.vector = tmp_vector; - - ifdict_tx = ifobj1; - ifdict_rx = ifobj2; + *ifobj1 = *ifobj2; + *ifobj2 = tmp_ifobj; } -static void testapp_bidi(void) +static void testapp_bidi(struct test_spec *test) { - for (int i = 0; i < MAX_BIDI_ITER; i++) { - print_verbose("Creating socket\n"); - testapp_validate(); - if (!second_step) { - print_verbose("Switching Tx/Rx vectors\n"); - swap_vectors(ifdict[1], ifdict[0]); - } - second_step = true; - } + test_spec_set_name(test, "BIDIRECTIONAL"); + test->ifobj_tx->rx_on = true; + test->ifobj_rx->tx_on = true; + test->total_steps = 2; + testapp_validate_traffic(test); - swap_vectors(ifdict[0], ifdict[1]); + print_verbose("Switching Tx/Rx vectors\n"); + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + testapp_validate_traffic(test); - print_ksft_result(); + swap_directions(&test->ifobj_rx, &test->ifobj_tx); } -static void swap_xsk_res(void) +static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) { - xsk_socket__delete(ifdict_tx->xsk->xsk); - xsk_umem__delete(ifdict_tx->umem->umem); - xsk_socket__delete(ifdict_rx->xsk->xsk); - xsk_umem__delete(ifdict_rx->umem->umem); - ifdict_tx->umem = ifdict_tx->umem_arr[1]; - ifdict_tx->xsk = ifdict_tx->xsk_arr[1]; - ifdict_rx->umem = ifdict_rx->umem_arr[1]; - ifdict_rx->xsk = ifdict_rx->xsk_arr[1]; + xsk_socket__delete(ifobj_tx->xsk->xsk); + xsk_umem__delete(ifobj_tx->umem->umem); + xsk_socket__delete(ifobj_rx->xsk->xsk); + xsk_umem__delete(ifobj_rx->umem->umem); + ifobj_tx->umem = &ifobj_tx->umem_arr[1]; + ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; + ifobj_rx->umem = &ifobj_rx->umem_arr[1]; + ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; } -static void testapp_bpf_res(void) +static void testapp_bpf_res(struct test_spec *test) { - int i; + test_spec_set_name(test, "BPF_RES"); + test->total_steps = 2; + test->nb_sockets = 2; + testapp_validate_traffic(test); - for (i = 0; i < MAX_BPF_ITER; i++) { - print_verbose("Creating socket\n"); - testapp_validate(); - if (!second_step) - swap_xsk_res(); - second_step = true; - } + swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); + testapp_validate_traffic(test); +} - print_ksft_result(); +static void testapp_headroom(struct test_spec *test) +{ + test_spec_set_name(test, "UMEM_HEADROOM"); + test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; + testapp_validate_traffic(test); } -static void testapp_stats(void) +static void testapp_stats(struct test_spec *test) { - for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) { - stat_test_type = i; + int i; - /* reset defaults */ - rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + for (i = 0; i < STAT_TEST_TYPE_MAX; i++) { + test_spec_reset(test); + stat_test_type = i; + /* No or few packets will be received so cannot pace packets */ + test->ifobj_tx->pacing_on = false; switch (stat_test_type) { case STAT_TEST_RX_DROPPED: - frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE - - XDP_PACKET_HEADROOM - 1; + test_spec_set_name(test, "STAT_RX_DROPPED"); + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - 1; + testapp_validate_traffic(test); break; case STAT_TEST_RX_FULL: - rxqsize = RX_FULL_RXQSIZE; + test_spec_set_name(test, "STAT_RX_FULL"); + test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE; + testapp_validate_traffic(test); break; case STAT_TEST_TX_INVALID: - continue; + test_spec_set_name(test, "STAT_TX_INVALID"); + pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + break; + case STAT_TEST_RX_FILL_EMPTY: + test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, + MIN_PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + test->ifobj_rx->pkt_stream->use_addr_for_fill = true; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + break; default: break; } - testapp_validate(); } - print_ksft_result(); + /* To only see the whole stat set being completed unless an individual test fails. */ + test_spec_set_name(test, "STATS"); +} + +/* Simple test */ +static bool hugepages_present(struct ifobject *ifobject) +{ + const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0); + if (bufs == MAP_FAILED) + return false; + + munmap(bufs, mmap_sz); + return true; +} + +static bool testapp_unaligned(struct test_spec *test) +{ + if (!hugepages_present(test->ifobj_tx)) { + ksft_test_result_skip("No 2M huge pages present.\n"); + return false; + } + + test_spec_set_name(test, "UNALIGNED_MODE"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* Let half of the packets straddle a buffer boundrary */ + pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); + test->ifobj_rx->pkt_stream->use_addr_for_fill = true; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + return true; +} + +static void testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + testapp_validate_traffic(test); + pkt_stream_restore_default(test); } -static void init_iface(struct ifobject *ifobj, const char *dst_mac, - const char *src_mac, const char *dst_ip, - const char *src_ip, const u16 dst_port, - const u16 src_port, enum fvector vector) +static void testapp_invalid_desc(struct test_spec *test) +{ + struct pkt pkts[] = { + /* Zero packet length at address zero allowed */ + {0, 0, 0, true}, + /* Zero packet length allowed */ + {0x1000, 0, 0, true}, + /* Straddling the start of umem */ + {-2, PKT_SIZE, 0, false}, + /* Packet too large */ + {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + /* After umem ends */ + {UMEM_SIZE, PKT_SIZE, 0, false}, + /* Straddle the end of umem */ + {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false}, + /* Straddle a page boundrary */ + {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, + /* Straddle a 2K boundrary */ + {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, + /* Valid packet for synch so that something is received */ + {0x4000, PKT_SIZE, 0, true}}; + + if (test->ifobj_tx->umem->unaligned_mode) { + /* Crossing a page boundrary allowed */ + pkts[6].valid = true; + } + if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundrary not allowed */ + pkts[7].valid = false; + } + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + testapp_validate_traffic(test); + pkt_stream_restore_default(test); +} + +static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, + const char *dst_ip, const char *src_ip, const u16 dst_port, + const u16 src_port, thread_func_t func_ptr) { struct in_addr ip; @@ -1009,58 +1309,80 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, ifobj->dst_port = dst_port; ifobj->src_port = src_port; - if (vector == tx) { - ifobj->fv.vector = tx; - ifobj->func_ptr = worker_testapp_validate_tx; - ifdict_tx = ifobj; - } else { - ifobj->fv.vector = rx; - ifobj->func_ptr = worker_testapp_validate_rx; - ifdict_rx = ifobj; - } + ifobj->func_ptr = func_ptr; } -static void run_pkt_test(int mode, int type) +static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { test_type = type; /* reset defaults after potential previous test */ - xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - second_step = 0; stat_test_type = -1; - rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; - - configured_mode = mode; - - switch (mode) { - case (TEST_MODE_SKB): - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case (TEST_MODE_DRV): - xdp_flags |= XDP_FLAGS_DRV_MODE; - break; - default: - break; - } switch (test_type) { case TEST_TYPE_STATS: - testapp_stats(); + testapp_stats(test); break; case TEST_TYPE_TEARDOWN: - testapp_teardown(); + testapp_teardown(test); break; case TEST_TYPE_BIDI: - testapp_bidi(); + testapp_bidi(test); break; case TEST_TYPE_BPF_RES: - testapp_bpf_res(); + testapp_bpf_res(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION: + test_spec_set_name(test, "RUN_TO_COMPLETION"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: + test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); + testapp_single_pkt(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: + test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + break; + case TEST_TYPE_POLL: + test->ifobj_tx->use_poll = true; + test->ifobj_rx->use_poll = true; + test_spec_set_name(test, "POLL"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_ALIGNED_INV_DESC: + test_spec_set_name(test, "ALIGNED_INV_DESC"); + testapp_invalid_desc(test); + break; + case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: + test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + testapp_invalid_desc(test); + break; + case TEST_TYPE_UNALIGNED_INV_DESC: + test_spec_set_name(test, "UNALIGNED_INV_DESC"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + testapp_invalid_desc(test); + break; + case TEST_TYPE_UNALIGNED: + if (!testapp_unaligned(test)) + return; + break; + case TEST_TYPE_HEADROOM: + testapp_headroom(test); break; default: - testapp_validate(); break; } + + print_ksft_result(test); } static struct ifobject *ifobject_create(void) @@ -1071,11 +1393,11 @@ static struct ifobject *ifobject_create(void) if (!ifobj) return NULL; - ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *)); + ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); if (!ifobj->xsk_arr) goto out_xsk_arr; - ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *)); + ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr)); if (!ifobj->umem_arr) goto out_umem_arr; @@ -1098,34 +1420,53 @@ static void ifobject_delete(struct ifobject *ifobj) int main(int argc, char **argv) { struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; - int i, j; + struct pkt_stream *pkt_stream_default; + struct ifobject *ifobj_tx, *ifobj_rx; + struct test_spec test; + u32 i, j; if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) exit_with_error(errno); - for (i = 0; i < MAX_INTERFACES; i++) { - ifdict[i] = ifobject_create(); - if (!ifdict[i]) - exit_with_error(ENOMEM); - } + ifobj_tx = ifobject_create(); + if (!ifobj_tx) + exit_with_error(ENOMEM); + ifobj_rx = ifobject_create(); + if (!ifobj_rx) + exit_with_error(ENOMEM); setlocale(LC_ALL, ""); - parse_command_line(argc, argv); + parse_command_line(ifobj_tx, ifobj_rx, argc, argv); - init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx); - init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx); + if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { + usage(basename(argv[0])); + ksft_exit_xfail(); + } + + init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, + worker_testapp_validate_tx); + init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, + worker_testapp_validate_rx); + + test_spec_init(&test, ifobj_tx, ifobj_rx, 0); + pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); + if (!pkt_stream_default) + exit_with_error(ENOMEM); + test.pkt_stream_default = pkt_stream_default; ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); for (i = 0; i < TEST_MODE_MAX; i++) for (j = 0; j < TEST_TYPE_MAX; j++) { - run_pkt_test(i, j); + test_spec_init(&test, ifobj_tx, ifobj_rx, i); + run_pkt_test(&test, i, j); usleep(USLEEP_MAX); } - for (i = 0; i < MAX_INTERFACES; i++) - ifobject_delete(ifdict[i]); + pkt_stream_delete(pkt_stream_default); + ifobject_delete(ifobj_tx); + ifobject_delete(ifobj_rx); ksft_exit_pass(); return 0; diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 7e49b9fbe25e..2f705f44b748 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -20,10 +20,9 @@ #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 -#define MAX_SOCKS 1 +#define MAX_SOCKETS 2 +#define MAX_TEST_NAME_SIZE 32 #define MAX_TEARDOWN_ITER 10 -#define MAX_BIDI_ITER 2 -#define MAX_BPF_ITER 2 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define MIN_PKT_SIZE 64 @@ -36,10 +35,13 @@ #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 -#define BATCH_SIZE 8 +#define BATCH_SIZE 64 #define POLL_TMOUT 1000 #define DEFAULT_PKT_CNT (4 * 1024) +#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) +#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) #define RX_FULL_RXQSIZE 32 +#define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -51,8 +53,15 @@ enum test_mode { }; enum test_type { - TEST_TYPE_NOPOLL, + TEST_TYPE_RUN_TO_COMPLETION, + TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, + TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, TEST_TYPE_POLL, + TEST_TYPE_UNALIGNED, + TEST_TYPE_ALIGNED_INV_DESC, + TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, + TEST_TYPE_UNALIGNED_INV_DESC, + TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, TEST_TYPE_STATS, @@ -68,25 +77,21 @@ enum stat_test_type { STAT_TEST_TYPE_MAX }; -static int configured_mode; static bool opt_pkt_dump; -static u32 num_frames = DEFAULT_PKT_CNT / 4; -static bool second_step; static int test_type; static bool opt_verbose; - -static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; static int stat_test_type; -static u32 rxqsize; -static u32 frame_headroom; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; + u32 num_frames; + u32 frame_headroom; void *buffer; + u32 frame_size; + bool unaligned_mode; }; struct xsk_socket_info { @@ -95,51 +100,63 @@ struct xsk_socket_info { struct xsk_umem_info *umem; struct xsk_socket *xsk; u32 outstanding_tx; -}; - -struct flow_vector { - enum fvector { - tx, - rx, - } vector; + u32 rxqsize; }; struct pkt { u64 addr; u32 len; u32 payload; + bool valid; }; struct pkt_stream { u32 nb_pkts; + u32 rx_pkt_nb; struct pkt *pkts; + bool use_addr_for_fill; }; +typedef void *(*thread_func_t)(void *arg); + struct ifobject { char ifname[MAX_INTERFACE_NAME_CHARS]; char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; struct xsk_socket_info *xsk; - struct xsk_socket_info **xsk_arr; - struct xsk_umem_info **umem_arr; + struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; - void *(*func_ptr)(void *arg); - struct flow_vector fv; + struct xsk_umem_info *umem_arr; + thread_func_t func_ptr; struct pkt_stream *pkt_stream; int ns_fd; u32 dst_ip; u32 src_ip; + u32 xdp_flags; + u32 bind_flags; u16 src_port; u16 dst_port; + bool tx_on; + bool rx_on; + bool use_poll; + bool pacing_on; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; -static struct ifobject *ifdict[MAX_INTERFACES]; -static struct ifobject *ifdict_rx; -static struct ifobject *ifdict_tx; +struct test_spec { + struct ifobject *ifobj_tx; + struct ifobject *ifobj_rx; + struct pkt_stream *pkt_stream_default; + u16 total_steps; + u16 current_step; + u16 nb_sockets; + char name[MAX_TEST_NAME_SIZE]; +}; -/*threads*/ pthread_barrier_t barr; -pthread_t t0, t1; +pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; + +u32 pkts_in_flight; #endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 73eb29c916d1..aa7d13d91963 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -54,7 +54,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd, #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif -TEST(close_range) +TEST(core_close_range) { int i, ret; int open_fds[101]; diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index bfabb19dc0d3..196b6640bf37 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -57,6 +57,19 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" echo "$orig_content" > "$file" +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +echo "$orig_content" > "$file" + # Test target_ids file # ==================== diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index 4de902ea14d8..de1c4e6de0b2 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <errno.h> -#include <linux/fcntl.h> +#include <fcntl.h> #include <malloc.h> #include <sys/ioctl.h> diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh new file mode 100755 index 000000000000..dca8be6092b9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Bridge FDB entries can be offloaded to DSA switches without holding the +# rtnl_mutex. Traditionally this mutex has conferred drivers implicit +# serialization, which means their code paths are not well tested in the +# presence of concurrency. +# This test creates a background task that stresses the FDB by adding and +# deleting an entry many times in a row without the rtnl_mutex held. +# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump +# (with rtnl_mutex held) from a foreground task. +# Since either the FDB dump or the additions/removals can fail, but the +# additions and removals are performed in deferred as opposed to process +# context, we cannot simply check for user space error codes. + +WAIT_TIME=1 +NUM_NETIFS=1 +REQUIRE_JQ="no" +REQUIRE_MZ="no" +NETIF_CREATE="no" +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh + +cleanup() { + echo "Cleaning up" + kill $pid && wait $pid &> /dev/null + ip link del br0 + echo "Please check kernel log for errors" +} +trap 'cleanup' EXIT + +eth=${NETIFS[p1]} + +ip link del br0 2&>1 >/dev/null || : +ip link add br0 type bridge && ip link set $eth master br0 + +(while :; do + bridge fdb add 00:01:02:03:04:05 dev $eth master static + bridge fdb del 00:01:02:03:04:05 dev $eth master static +done) & +pid=$! + +for i in $(seq 1 50); do + bridge fdb show > /dev/null + sleep 3 + echo "$((${i} * 2))% complete..." +done diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh index a37273473c1b..d3a891d421ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -87,6 +87,7 @@ ALL_TESTS=" NUM_NETIFS=4 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh h1_create() { @@ -626,8 +627,7 @@ ipv6_redirect_test() ptp_event_test() { - # PTP is only supported on Spectrum-1, for now. - [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 1 || return # PTP Sync (0) devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \ @@ -638,8 +638,7 @@ ptp_event_test() ptp_general_test() { - # PTP is only supported on Spectrum-1, for now. - [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 1 || return # PTP Announce (b) devlink_trap_stats_test "PTP General Message" "ptp_general" \ diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 508a702f0021..0bd5ffc218ac 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -272,13 +272,17 @@ __rate_test() rate_test() { - local id + local last_policer=$(devlink -j -p trap policer show | + jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') - for id in $(devlink_trap_policer_ids_get); do - echo - log_info "Running rate test for policer $id" - __rate_test $id - done + log_info "Running rate test for policer 1" + __rate_test 1 + + log_info "Running rate test for policer $((last_policer / 2))" + __rate_test $((last_policer / 2)) + + log_info "Running rate test for policer $last_policer" + __rate_test $last_policer } __burst_test() @@ -342,13 +346,17 @@ __burst_test() burst_test() { - local id + local last_policer=$(devlink -j -p trap policer show | + jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') + + log_info "Running burst test for policer 1" + __burst_test 1 + + log_info "Running burst test for policer $((last_policer / 2))" + __burst_test $((last_policer / 2)) - for id in $(devlink_trap_policer_ids_get); do - echo - log_info "Running burst size test for policer $id" - __burst_test $id - done + log_info "Running burst test for policer $last_policer" + __burst_test $last_policer } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index 8817851da7a9..e9a82cae8c9a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -13,7 +13,7 @@ # | # +-------------------|-----+ # | SW1 | | -# | $swp1 + | +# | $swp1 + | # | 192.0.2.2/28 | # | | # | + g1a (gre) | @@ -27,8 +27,8 @@ # | # +--|----------------------+ # | | VRF2 | -# | + $rp2 | -# | 198.51.100.2/28 | +# | + $rp2 | +# | 198.51.100.2/28 | # +-------------------------+ lib_dir=$(dirname $0)/../../../net/forwarding @@ -116,12 +116,16 @@ cleanup() forwarding_restore } -ecn_payload_get() +ipip_payload_get() { + local flags=$1; shift + local key=$1; shift + p=$(: - )"0"$( : GRE flags + )"$flags"$( : GRE flags )"0:00:"$( : Reserved + version )"08:00:"$( : ETH protocol type + )"$key"$( : Key )"4"$( : IP version )"5:"$( : IHL )"00:"$( : IP TOS @@ -137,6 +141,11 @@ ecn_payload_get() echo $p } +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + ecn_decap_test() { local trap_name="decap_error" @@ -171,31 +180,6 @@ ecn_decap_test() tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } -ipip_payload_get() -{ - local flags=$1; shift - local key=$1; shift - - p=$(: - )"$flags"$( : GRE flags - )"0:00:"$( : Reserved + version - )"08:00:"$( : ETH protocol type - )"$key"$( : Key - )"4"$( : IP version - )"5:"$( : IHL - )"00:"$( : IP TOS - )"00:14:"$( : IP total length - )"00:00:"$( : IP identification - )"20:00:"$( : IP flags + frag off - )"30:"$( : IP TTL - )"01:"$( : IP proto - )"E7:E6:"$( : IP header csum - )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 - )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 - ) - echo $p -} - no_matching_tunnel_test() { local trap_name="decap_error" @@ -239,7 +223,8 @@ decap_error_test() no_matching_tunnel_test "Decap error: Source IP check failed" \ 192.0.2.68 "0" no_matching_tunnel_test \ - "Decap error: Key exists but was not expected" $sip "2" ":E9:" + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" # Destroy the tunnel and create new one with key __addr_add_del g1 del 192.0.2.65/32 @@ -251,7 +236,8 @@ decap_error_test() no_matching_tunnel_test \ "Decap error: Key does not exist but was expected" $sip "0" no_matching_tunnel_test \ - "Decap error: Packet has a wrong key field" $sip "2" "E8:" + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh index cbe50f260a40..a95856aafd2a 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -11,3 +11,53 @@ if [[ ! -v MLXSW_CHIP ]]; then exit 1 fi fi + +MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in + mlxsw_spectrum) + echo 1 ;; + mlxsw_spectrum*) + echo ${MLXSW_CHIP#mlxsw_spectrum} ;; + *) + echo "Couldn't determine Spectrum chip revision." \ + > /dev/stderr ;; + esac) + +mlxsw_on_spectrum() +{ + local rev=$1; shift + local op="==" + local rev2=${rev%+} + + if [[ $rev2 != $rev ]]; then + op=">=" + fi + + ((MLXSW_SPECTRUM_REV $op rev2)) +} + +__mlxsw_only_on_spectrum() +{ + local rev=$1; shift + local caller=$1; shift + local src=$1; shift + + if ! mlxsw_on_spectrum "$rev"; then + log_test_skip $src:$caller "(Spectrum-$rev only)" + return 1 + fi +} + +mlxsw_only_on_spectrum() +{ + local caller=${FUNCNAME[1]} + local src=${BASH_SOURCE[1]} + local rev + + for rev in "$@"; do + if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then + return 0 + fi + done + + return 1 +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..71e7681f15f6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to +# the maximum number of RIF MAC profiles, sets each of them with a random +# MAC address, and checks that eventually the number of occupied RIF MAC +# profiles equals the maximum number of RIF MAC profiles. + + +RIF_MAC_PROFILE_NUM_NETIFS=2 + +rif_mac_profiles_create() +{ + local count=$1; shift + local should_fail=$1; shift + local batch_file="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan=$(( i*10 )) + m=$(( i*11 )) + + cat >> $batch_file <<-EOF + link add link $h1 name $h1.$vlan \ + address 00:$m:$m:$m:$m:$m type vlan id $vlan + address add 192.0.$m.1/24 dev $h1.$vlan + EOF + done + + ip -b $batch_file &> /dev/null + check_err_fail $should_fail $? "RIF creation" + + rm -f $batch_file +} + +rif_mac_profile_test() +{ + local count=$1; shift + local should_fail=$1; shift + + rif_mac_profiles_create $count $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $count ]] + check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)" +} + +rif_mac_profile_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses + # being generated and RIFs being created. + sysctl_set net.ipv6.conf.$h1.disable_ipv6 1 + sysctl_set net.ipv6.conf.$h2.disable_ipv6 1 + + ip link set $h1 up + ip link set $h2 up +} + +rif_mac_profile_cleanup() +{ + pre_cleanup + + ip link set $h2 down + ip link set $h1 down + + sysctl_restore net.ipv6.conf.$h2.disable_ipv6 + sysctl_restore net.ipv6.conf.$h1.disable_ipv6 +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh new file mode 100755 index 000000000000..c18340cee55d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + mac_profile_test +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2 + + tc qdisc add dev $h1 ingress +} + +h1_destroy() +{ + tc qdisc del dev $h1 ingress + + ip route del 198.51.100.0/24 vrf v$h1 + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 + ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 192.0.2.0/24 vrf v$h2 + simple_if_fini $h2 198.51.100.1/24 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp1 clsact + tc qdisc add dev $rp2 clsact + ip address add 192.0.2.2/24 dev $rp1 + ip address add 198.51.100.2/24 dev $rp2 +} + +router_destroy() +{ + ip address del 198.51.100.2/24 dev $rp2 + ip address del 192.0.2.2/24 dev $rp1 + tc qdisc del dev $rp2 clsact + tc qdisc del dev $rp1 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +h1_to_h2() +{ + local test_name=$@; shift + local smac=$(mac_get $rp2) + + RET=0 + + # Replace neighbour to avoid first packet being forwarded in software + ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2) + + # Add a filter to ensure that packets are forwarded in hardware. Cannot + # match on source MAC because it is not set in eACL after routing + tc filter add dev $rp2 egress proto ip pref 1 handle 101 \ + flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \ + action pass + + # Add a filter to ensure that packets are received with the correct + # source MAC + tc filter add dev $h2 ingress proto ip pref 1 handle 101 \ + flower skip_sw src_mac $smac ip_proto udp src_port 12345 \ + dst_port 54321 action pass + + $MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \ + -A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q + + tc_check_packets "dev $rp2 egress" 101 10 + check_err $? "packets not forwarded in hardware" + + tc_check_packets "dev $h2 ingress" 101 10 + check_err $? "packets not forwarded with correct source mac" + + log_test "h1->h2: $test_name" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower + ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2) +} + +h2_to_h1() +{ + local test_name=$@; shift + local rp1_mac=$(mac_get $rp1) + + RET=0 + + ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1) + + tc filter add dev $rp1 egress proto ip pref 1 handle 101 \ + flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \ + action pass + + tc filter add dev $h1 ingress proto ip pref 1 handle 101 \ + flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \ + dst_port 12345 action pass + + $MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \ + -A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q + + tc_check_packets "dev $rp1 egress" 101 10 + check_err $? "packets not forwarded in hardware" + + tc_check_packets "dev $h1 ingress" 101 10 + check_err $? "packets not forwarded with correct source mac" + + log_test "h2->h1: $test_name" + + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower + ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1) +} + +smac_test() +{ + local test_name=$@; shift + + # Test that packets forwarded to $h2 via $rp2 are forwarded with the + # current source MAC of $rp2 + h1_to_h2 $test_name + + # Test that packets forwarded to $h1 via $rp1 are forwarded with the + # current source MAC of $rp1. This MAC is never changed during the test, + # but given the shared nature of MAC profile, the point is to see that + # changes to the MAC of $rp2 do not affect that of $rp1 + h2_to_h1 $test_name +} + +mac_profile_test() +{ + local rp2_mac=$(mac_get $rp2) + + # Test behavior when the RIF backing $rp2 is transitioned to use + # a new MAC profile + ip link set dev $rp2 addr 00:11:22:33:44:55 + smac_test "new mac profile" + + # Test behavior when the MAC profile used by the RIF is edited + ip link set dev $rp2 address 00:22:22:22:22:22 + smac_test "edit mac profile" + + # Restore original MAC + ip link set dev $rp2 addr $rp2_mac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +mac_profiles=$(devlink_resource_size_get rif_mac_profiles) +if [[ $mac_profiles -ne 1 ]]; then + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh new file mode 100755 index 000000000000..b513f64d9092 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_mac_profile_edit_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses + # being generated and RIFs being created + sysctl_set net.ipv6.conf.$h1.disable_ipv6 1 + sysctl_set net.ipv6.conf.$h2.disable_ipv6 1 + + ip link set $h1 up + ip link set $h2 up +} + +cleanup() +{ + pre_cleanup + + ip link set $h2 down + ip link set $h1 down + + sysctl_restore net.ipv6.conf.$h2.disable_ipv6 + sysctl_restore net.ipv6.conf.$h1.disable_ipv6 + + # Reload in order to clean all the RIFs and RIF MAC profiles created + devlink_reload +} + +create_max_rif_mac_profiles() +{ + local count=$1; shift + local batch_file="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan=$(( i*10 )) + m=$(( i*11 )) + + cat >> $batch_file <<-EOF + link add link $h1 name $h1.$vlan \ + address 00:$m:$m:$m:$m:$m type vlan id $vlan + address add 192.0.$m.1/24 dev $h1.$vlan + EOF + done + + ip -b $batch_file &> /dev/null + rm -f $batch_file +} + +rif_mac_profile_replacement_test() +{ + local h1_10_mac=$(mac_get $h1.10) + + RET=0 + + ip link set $h1.10 address 00:12:34:56:78:99 + check_err $? + + log_test "RIF MAC profile replacement" + + ip link set $h1.10 address $h1_10_mac +} + +rif_mac_profile_shared_replacement_test() +{ + local count=$1; shift + local i=$((count + 1)) + local vlan=$(( i*10 )) + local m=11 + + RET=0 + + # Create a VLAN netdevice that has the same MAC as the first one. + ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \ + type vlan id $vlan + ip address add 192.0.$m.1/24 dev $h1.$vlan + + # MAC replacement should fail because all the MAC profiles are in use + # and the profile is shared between multiple RIFs + m=$(( i*11 )) + ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null + check_fail $? + + log_test "RIF MAC profile shared replacement" + + ip link del dev $h1.$vlan +} + +rif_mac_profile_edit_test() +{ + local count=$(devlink_resource_size_get rif_mac_profiles) + + create_max_rif_mac_profiles $count + + rif_mac_profile_replacement_test + rif_mac_profile_shared_replacement_test $count +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index a217f9f6775b..04f03ae9d8fb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -10,9 +10,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" - rif_set_addr_test rif_vrf_set_addr_test - rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test vlan_interface_deletion_test bridge_deletion_test @@ -60,55 +58,6 @@ cleanup() ip link set dev $swp1 down } -rif_set_addr_test() -{ - local swp1_mac=$(mac_get $swp1) - local swp2_mac=$(mac_get $swp2) - - RET=0 - - # $swp1 and $swp2 likely got their IPv6 local addresses already, but - # here we need to test the transition to RIF. - ip addr flush dev $swp1 - ip addr flush dev $swp2 - sleep .1 - - ip addr add dev $swp1 192.0.2.1/28 - check_err $? - - ip link set dev $swp1 addr 00:11:22:33:44:55 - check_err $? - - # IP address enablement should be rejected if the MAC address prefix - # doesn't match other RIFs. - ip addr add dev $swp2 192.0.2.2/28 &>/dev/null - check_fail $? "IP address addition passed for a device with a wrong MAC" - ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "no extack for IP address addition" - - ip link set dev $swp2 addr 00:11:22:33:44:66 - check_err $? - ip addr add dev $swp2 192.0.2.2/28 &>/dev/null - check_err $? - - # Change of MAC address of a RIF should be forbidden if the new MAC - # doesn't share the prefix with other MAC addresses. - ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null - check_fail $? "change of MAC address passed for a wrong MAC" - ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "no extack for MAC address change" - - log_test "RIF - bad MAC change" - - ip addr del dev $swp2 192.0.2.2/28 - ip addr del dev $swp1 192.0.2.1/28 - - ip link set dev $swp2 addr $swp2_mac - ip link set dev $swp1 addr $swp1_mac -} - rif_vrf_set_addr_test() { # Test that it is possible to set an IP address on a VRF upper despite @@ -128,45 +77,6 @@ rif_vrf_set_addr_test() ip link del dev vrf-test } -rif_inherit_bridge_addr_test() -{ - RET=0 - - # Create first RIF - ip addr add dev $swp1 192.0.2.1/28 - check_err $? - - # Create a FID RIF - ip link add name br1 up type bridge vlan_filtering 0 - ip link set dev $swp2 master br1 - ip addr add dev br1 192.0.2.17/28 - check_err $? - - # Prepare a device with a low MAC address - ip link add name d up type dummy - ip link set dev d addr 00:11:22:33:44:55 - - # Attach the device to br1. That prompts bridge address change, which - # should be vetoed, thus preventing the attachment. - ip link set dev d master br1 &>/dev/null - check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" - ip link set dev d master br1 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "no extack for bridge attach rejection" - - ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null - check_fail $? "Changing swp2's MAC address permitted" - ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "no extack for bridge port MAC address change rejection" - - log_test "RIF - attach port with bad MAC to bridge" - - ip link del dev d - ip link del dev br1 - ip addr del dev $swp1 192.0.2.1/28 -} - rif_non_inherit_bridge_addr_test() { local swp2_mac=$(mac_get $swp2) @@ -779,7 +689,7 @@ nexthop_obj_offload_test() setup_wait ip nexthop add id 1 via 192.0.2.2 dev $swp1 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -791,7 +701,7 @@ nexthop_obj_offload_test() ip nexthop show id 1 check_err $? "nexthop marked as offloaded after setting neigh to failed state" - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ ip nexthop show id 1 @@ -828,11 +738,11 @@ nexthop_obj_group_offload_test() ip nexthop add id 1 via 192.0.2.2 dev $swp1 ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 ip nexthop add id 10 group 1/2 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -888,11 +798,11 @@ nexthop_obj_bucket_offload_test() ip nexthop add id 1 via 192.0.2.2 dev $swp1 ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ @@ -921,7 +831,7 @@ nexthop_obj_bucket_offload_test() check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" # Revalidate nexthop id 2 by changing its neighbour - ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 busywait "$TIMEOUT" wait_for_offload \ ip nexthop bucket show nhid 2 @@ -971,9 +881,9 @@ nexthop_obj_route_offload_test() setup_wait ip nexthop add id 1 via 192.0.2.2 dev $swp1 - ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 - ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ dev $swp1 ip route replace 198.51.100.0/24 nhid 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh new file mode 100755 index 000000000000..071a33d10c20 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh @@ -0,0 +1,290 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test qdisc offload indication + + +ALL_TESTS=" + test_root + test_port_tbf + test_etsprio + test_etsprio_port_tbf +" +NUM_NETIFS=1 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh + +check_not_offloaded() +{ + local handle=$1; shift + local h + local offloaded + + h=$(qdisc_stats_get $h1 "$handle" .handle) + [[ $h == '"'$handle'"' ]] + check_err $? "Qdisc with handle $handle does not exist" + + offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_fail $? "Qdisc with handle $handle offloaded, but should not be" +} + +check_all_offloaded() +{ + local handle=$1; shift + + if [[ ! -z $handle ]]; then + local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_err $? "Qdisc with handle $handle not offloaded" + fi + + local unoffloaded=$(tc q sh dev $h1 invisible | + grep -v offloaded | + sed s/root/parent\ root/ | + cut -d' ' -f 5) + [[ -z $unoffloaded ]] + check_err $? "Qdiscs with following parents not offloaded: $unoffloaded" + + pre_cleanup +} + +with_ets() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + ets bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_prio() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_red() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500 + "$@" + tc qdisc del dev $h1 $locus +} + +with_tbf() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + tbf rate 400Mbit burst 128K limit 1M + "$@" + tc qdisc del dev $h1 $locus +} + +with_pfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_bfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_drr() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle drr + "$@" + tc qdisc del dev $h1 $locus +} + +with_qdiscs() +{ + local handle=$1; shift + local parent=$1; shift + local kind=$1; shift + local next_handle=$((handle * 2)) + local locus; + + if [[ $kind == "--" ]]; then + local cmd=$1; shift + $cmd $(printf %x: $parent) "$@" + else + if ((parent == 0)); then + locus=root + else + locus=$(printf "parent %x:1" $parent) + fi + + with_$kind $(printf %x: $handle) "$locus" \ + with_qdiscs $next_handle $handle "$@" + fi +} + +get_name() +{ + local parent=$1; shift + local name=$(echo "" "${@^^}" | tr ' ' -) + + if ((parent != 0)); then + kind=$(qdisc_stats_get $h1 $parent: .kind) + kind=${kind%\"} + kind=${kind#\"} + name="-${kind^^}$name" + fi + + echo root$name +} + +do_test_offloaded() +{ + local handle=$1; shift + local parent=$1; shift + + RET=0 + with_qdiscs $handle $parent "$@" -- check_all_offloaded + log_test $(get_name $parent "$@")" offloaded" +} + +do_test_nooffload() +{ + local handle=$1; shift + local parent=$1; shift + + local name=$(echo "${@^^}" | tr ' ' -) + local kind + + RET=0 + with_qdiscs $handle $parent "$@" -- check_not_offloaded + log_test $(get_name $parent "$@")" not offloaded" +} + +do_test_combinations() +{ + local handle=$1; shift + local parent=$1; shift + + local cont + local leaf + local fifo + + for cont in "" ets prio; do + for leaf in "" red tbf "red tbf" "tbf red"; do + for fifo in "" pfifo bfifo; do + if [[ -z "$cont$leaf$fifo" ]]; then + continue + fi + do_test_offloaded $handle $parent \ + $cont $leaf $fifo + done + done + done + + for cont in ets prio; do + for leaf in red tbf; do + do_test_nooffload $handle $parent $cont red tbf $leaf + do_test_nooffload $handle $parent $cont tbf red $leaf + done + for leaf in "red red" "tbf tbf"; do + do_test_nooffload $handle $parent $cont $leaf + done + done + + do_test_nooffload $handle $parent drr +} + +test_root() +{ + do_test_combinations 1 0 +} + +test_port_tbf() +{ + with_tbf 1: root \ + do_test_combinations 8 1 +} + +do_test_etsprio() +{ + local parent=$1; shift + local tbfpfx=$1; shift + local cont + + for cont in ets prio; do + RET=0 + with_$cont 8: "$parent" \ + with_red 11: "parent 8:1" \ + with_red 12: "parent 8:2" \ + with_tbf 13: "parent 8:3" \ + with_tbf 14: "parent 8:4" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED,TBF} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_tbf 84: "parent 8:4" \ + with_red 841: "parent 84:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_bfifo 8111: "parent 811:1" \ + with_tbf 82: "parent 8:2" \ + with_red 821: "parent 82:1" \ + with_bfifo 8211: "parent 821:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded" + done +} + +test_etsprio() +{ + do_test_etsprio root "" +} + +test_etsprio_port_tbf() +{ + with_tbf 1: root \ + do_test_etsprio "parent 1:1" "-TBF" +} + +cleanup() +{ + tc qdisc del dev $h1 root &>/dev/null +} + +trap cleanup EXIT +h1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 33ddd01689be..f260f01db0e8 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -73,6 +73,7 @@ CHECK_TC="yes" lib_dir=$(dirname $0)/../../../net/forwarding source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh source qos_lib.sh ipaddr() @@ -331,6 +332,14 @@ get_nmarked() ethtool_stats_get $swp3 ecn_marked } +get_qdisc_nmarked() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked +} + get_qdisc_npackets() { local vlan=$1; shift @@ -384,14 +393,15 @@ build_backlog() check_marking() { + local get_nmarked=$1; shift local vlan=$1; shift local cond=$1; shift local npackets_0=$(get_qdisc_npackets $vlan) - local nmarked_0=$(get_nmarked $vlan) + local nmarked_0=$($get_nmarked $vlan) sleep 5 local npackets_1=$(get_qdisc_npackets $vlan) - local nmarked_1=$(get_nmarked $vlan) + local nmarked_1=$($get_nmarked $vlan) local nmarked_d=$((nmarked_1 - nmarked_0)) local npackets_d=$((npackets_1 - npackets_0)) @@ -404,6 +414,7 @@ check_marking() ecn_test_common() { local name=$1; shift + local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift local backlog @@ -416,7 +427,7 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking "$get_nmarked" $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): $name backlog < limit" @@ -426,22 +437,23 @@ ecn_test_common() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan ">= 95") + pct=$(check_marking "$get_nmarked" $vlan ">= 95") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." log_test "TC $((vlan - 10)): $name backlog > limit" } -do_ecn_test() +__do_ecn_test() { + local get_nmarked=$1; shift local vlan=$1; shift local limit=$1; shift - local name=ECN + local name=${1-ECN}; shift start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" $vlan $limit + ecn_test_common "$name" "$get_nmarked" $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, it should all get dropped, and backlog @@ -455,6 +467,23 @@ do_ecn_test() sleep 1 } +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + + __do_ecn_test get_nmarked "$vlan" "$limit" +} + +do_ecn_test_perband() +{ + local vlan=$1; shift + local limit=$1; shift + + mlxsw_only_on_spectrum 3+ || return + __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" +} + do_ecn_nodrop_test() { local vlan=$1; shift @@ -465,7 +494,7 @@ do_ecn_nodrop_test() $h3_mac tos=0x01 sleep 1 - ecn_test_common "$name" $vlan $limit + ecn_test_common "$name" get_nmarked $vlan $limit # Up there we saw that UDP gets accepted when backlog is below the # limit. Now that it is above, in nodrop mode, make sure it goes to @@ -495,7 +524,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) check_err $? "Could not build the requested backlog" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "TC $((vlan - 10)): RED backlog < limit" @@ -503,7 +532,7 @@ do_red_test() RET=0 backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) check_fail $? "Traffic went into backlog instead of being early-dropped" - pct=$(check_marking $vlan "== 0") + pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) @@ -544,6 +573,55 @@ do_mc_backlog_test() log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } +do_mark_test() +{ + local vlan=$1; shift + local limit=$1; shift + local subtest=$1; shift + local fetch_counter=$1; shift + local should_fail=$1; shift + local base + + mlxsw_only_on_spectrum 2+ || return + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + + # Create a bit of a backlog and observe no mirroring due to marks. + qevent_rule_install_$subtest + + build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null + + base=$($fetch_counter) + count=$(busywait 1100 until_counter_is ">= $((base + 1))" \ + $fetch_counter) + check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure" + + # Above limit, everything should be mirrored, we should see lots of + # packets. + build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null + busywait_for_counter 1100 +10000 \ + $fetch_counter > /dev/null + check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" + + # When the rule is uninstalled, there should be no mirroring. + qevent_rule_uninstall_$subtest + busywait_for_counter 1100 +10 \ + $fetch_counter > /dev/null + check_fail $? "Spurious packets observed after uninstall" + + if ((should_fail)); then + log_test "TC $((vlan - 10)): marked packets not $subtest'd" + else + log_test "TC $((vlan - 10)): marked packets $subtest'd" + fi + + stop_traffic + sleep 1 +} + do_drop_test() { local vlan=$1; shift @@ -551,10 +629,10 @@ do_drop_test() local trigger=$1; shift local subtest=$1; shift local fetch_counter=$1; shift - local backlog local base local now - local pct + + mlxsw_only_on_spectrum 2+ || return RET=0 @@ -628,6 +706,22 @@ do_drop_mirror_test() tc filter del dev $h2 ingress pref 1 handle 101 flower } +do_mark_mirror_test() +{ + local vlan=$1; shift + local limit=$1; shift + + tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ + flower skip_sw ip_proto tcp \ + action drop + + do_mark_test "$vlan" "$limit" mirror \ + qevent_counter_fetch_mirror \ + $(: should_fail=)0 + + tc filter del dev $h2 ingress pref 1 handle 101 flower +} + qevent_rule_install_trap() { tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ @@ -655,3 +749,14 @@ do_drop_trap_test() do_drop_test "$vlan" "$limit" "$trap_name" trap \ "qevent_counter_fetch_trap $trap_name" } + +qevent_rule_install_trap_fwd() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action trap_fwd hw_stats disabled +} + +qevent_rule_uninstall_trap_fwd() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index f3ef3274f9b3..1e5ad3209436 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -4,11 +4,13 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_test_perband ecn_nodrop_test red_test mc_backlog_test red_mirror_test red_trap_test + ecn_mirror_test " : ${QDISC:=ets} source sch_red_core.sh @@ -21,28 +23,60 @@ source sch_red_core.sh BACKLOG1=200000 BACKLOG2=500000 -install_qdisc() +install_root_qdisc() { - local -a args=("$@") - tc qdisc add dev $swp3 root handle 10: $QDISC \ bands 8 priomap 7 6 5 4 3 2 1 0 +} + +install_qdisc_tc0() +{ + local -a args=("$@") + tc qdisc add dev $swp3 parent 10:8 handle 108: red \ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ probability 1.0 avpkt 8000 burst 38 "${args[@]}" +} + +install_qdisc_tc1() +{ + local -a args=("$@") + tc qdisc add dev $swp3 parent 10:7 handle 107: red \ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ probability 1.0 avpkt 8000 burst 63 "${args[@]}" +} + +install_qdisc() +{ + install_root_qdisc + install_qdisc_tc0 "$@" + install_qdisc_tc1 "$@" sleep 1 } -uninstall_qdisc() +uninstall_qdisc_tc0() { - tc qdisc del dev $swp3 parent 10:7 tc qdisc del dev $swp3 parent 10:8 +} + +uninstall_qdisc_tc1() +{ + tc qdisc del dev $swp3 parent 10:7 +} + +uninstall_root_qdisc() +{ tc qdisc del dev $swp3 root } +uninstall_qdisc() +{ + uninstall_qdisc_tc0 + uninstall_qdisc_tc1 + uninstall_root_qdisc +} + ecn_test() { install_qdisc ecn @@ -53,6 +87,16 @@ ecn_test() uninstall_qdisc } +ecn_test_perband() +{ + install_qdisc ecn + + do_ecn_test_perband 10 $BACKLOG1 + do_ecn_test_perband 11 $BACKLOG2 + + uninstall_qdisc +} + ecn_nodrop_test() { install_qdisc ecn nodrop @@ -112,6 +156,16 @@ red_trap_test() uninstall_qdisc } +ecn_mirror_test() +{ + install_qdisc ecn qevent mark block 10 + + do_mark_mirror_test 10 $BACKLOG1 + do_mark_mirror_test 11 $BACKLOG2 + + uninstall_qdisc +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index ede9c38d3eff..d79a82f317d2 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -4,6 +4,7 @@ ALL_TESTS=" ping_ipv4 ecn_test + ecn_test_perband ecn_nodrop_test red_test mc_backlog_test @@ -35,6 +36,13 @@ ecn_test() uninstall_qdisc } +ecn_test_perband() +{ + install_qdisc ecn + do_ecn_test_perband 10 $BACKLOG + uninstall_qdisc +} + ecn_nodrop_test() { install_qdisc ecn nodrop diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh new file mode 100755 index 000000000000..f62ce479c266 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 2001:db8:1::2/64 | +# | | +# | + g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 2001:db8:10::1/64 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:10::2/64 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:10::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:10::2/64 +} + +switch_create() +{ + ip link set dev $swp1 up + __addr_add_del $swp1 add 2001:db8:1::2/64 + tc qdisc add dev $swp1 clsact + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit + ip link set dev g1 up + __addr_add_del g1 add 2001:db8:3::1/128 + + ip link set dev $rp1 up + __addr_add_del $rp1 add 2001:db8:10::1/64 +} + +switch_destroy() +{ + __addr_add_del $rp1 del 2001:db8:10::1/64 + ip link set dev $rp1 down + + __addr_add_del g1 del 2001:db8:3::1/128 + ip link set dev g1 down + tunnel_destroy g1 + + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 2001:db8:1::2/64 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ipip_payload_get() +{ + local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"86:dd:"$( : ETH protocol type + )"$key"$( : Key + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:00:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + ) + echo $p +} + +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \ + action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \ + tos=$outer_tos,next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=2001:db8:3::2 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 2001:db8:4::2 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 2001:db8:3::1/128 + tunnel_destroy g1 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit key 233 + __addr_add_del g1 add 2001:db8:3::1/128 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 50654f8a8c37..e9f65bd2e299 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -7,12 +7,9 @@ NUM_NETIFS=6 source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh +source ../mlxsw_lib.sh -if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \ - "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then - echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3" - exit 1 -fi +mlxsw_only_on_spectrum 2+ || exit 1 current_test="" @@ -28,7 +25,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port" +ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..303d7cbe3c45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_mac_profile_scale.sh + +rif_mac_profile_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get rif_mac_profiles) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh index 73035e25085d..06a80f40daa4 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh @@ -2,11 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 source "../../../../net/forwarding/devlink_lib.sh" +source ../mlxsw_lib.sh -if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then - echo "SKIP: test is tailored for Mellanox Spectrum" - exit 1 -fi +mlxsw_only_on_spectrum 1 || exit 1 # Needed for returning to default declare -A KVD_DEFAULTS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 685dfb3478b3..bcb110e830ce 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port" +ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..303d7cbe3c45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_mac_profile_scale.sh + +rif_mac_profile_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get rif_mac_profiles) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 5ec3beb637c8..0441a18f098b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -20,6 +20,7 @@ NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh switch_create() { @@ -169,7 +170,7 @@ matchall_sample_egress_test() # It is forbidden in mlxsw driver to have matchall with sample action # bound on egress. Spectrum-1 specific restriction - [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 1 || return tc qdisc add dev $swp1 clsact diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 373d5f2a846e..83a0210e7544 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -51,6 +51,7 @@ NUM_NETIFS=8 CAPTURE_FILE=$(mktemp) source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh # Available at https://github.com/Mellanox/libpsample require_command psample @@ -431,7 +432,7 @@ tc_sample_md_out_tc_test() RET=0 # Output traffic class is not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 5 group 1 @@ -477,7 +478,7 @@ tc_sample_md_out_tc_occ_test() RET=0 # Output traffic class occupancy is not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 1024 group 1 @@ -521,7 +522,7 @@ tc_sample_md_latency_test() RET=0 # Egress sampling not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \ skip_sw action sample rate 5 group 1 @@ -550,7 +551,7 @@ tc_sample_acl_group_conflict_test() # port with different groups. # Policy-based sampling is not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ skip_sw action sample rate 1024 group 1 @@ -579,7 +580,7 @@ __tc_sample_acl_rate_test() RET=0 # Policy-based sampling is not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1 @@ -631,7 +632,7 @@ tc_sample_acl_max_rate_test() RET=0 # Policy-based sampling is not supported on Spectrum-1. - [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + mlxsw_only_on_spectrum 2+ || return tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ skip_sw action sample rate $((2 ** 24 - 1)) group 1 diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 7ca1f030d209..922744059aaa 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -50,7 +50,7 @@ function make_netdev { modprobe netdevsim fi - echo $NSIM_ID > /sys/bus/netdevsim/new_device + echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device # get new device name ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ } diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh new file mode 100755 index 000000000000..fd13c8cfb7a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +set -o pipefail + +n_children() { + n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l) + echo $((n - 1)) +} + +tcq() { + tc qdisc $1 dev $NDEV ${@:2} +} + +n_child_assert() { + n=$(n_children) + if [ $n -ne $1 ]; then + echo "ERROR ($root): ${@:2}, expected $1 have $n" + ((num_errors++)) + else + ((num_passes++)) + fi +} + + +for root in mq mqprio; do + NDEV=$(make_netdev 1 4) + + opts= + [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0 queues 1@0' + + tcq add root handle 100: $root $opts + n_child_assert 4 'Init' + + # All defaults + + for n in 3 2 1 2 3 4 1 4; do + ethtool -L $NDEV combined $n + n_child_assert $n "Change queues to $n while down" + done + + ip link set dev $NDEV up + + for n in 3 2 1 2 3 4 1 4; do + ethtool -L $NDEV combined $n + n_child_assert $n "Change queues to $n while up" + done + + # One real one + tcq replace parent 100:4 handle 204: pfifo_fast + n_child_assert 4 "One real queue" + + ethtool -L $NDEV combined 1 + n_child_assert 2 "One real queue, one default" + + ethtool -L $NDEV combined 4 + n_child_assert 4 "One real queue, rest default" + + # Graft some + tcq replace parent 100:1 handle 204: + n_child_assert 3 "Grafted" + + ethtool -L $NDEV combined 1 + n_child_assert 1 "Grafted, one" + + cleanup_nsim +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index f7d84549cc3e..eaf8a04a7ca5 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -156,6 +156,11 @@ create_tcam_skeleton() setup_prepare() { + ip link set $eth0 up + ip link set $eth1 up + ip link set $eth2 up + ip link set $eth3 up + create_tcam_skeleton $eth0 ip link add br0 type bridge @@ -242,9 +247,9 @@ test_vlan_push() tcpdump_cleanup } -test_vlan_modify() +test_vlan_ingress_modify() { - printf "Testing VLAN modification.. " + printf "Testing ingress VLAN modification.. " ip link set br0 type bridge vlan_filtering 1 bridge vlan add dev $eth0 vid 200 @@ -280,6 +285,44 @@ test_vlan_modify() ip link set br0 type bridge vlan_filtering 0 } +test_vlan_egress_modify() +{ + printf "Testing egress VLAN modification.. " + + tc qdisc add dev $eth1 clsact + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $eth0 vid 200 + bridge vlan add dev $eth1 vid 200 + + tc filter add dev $eth1 egress chain $(ES0) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \ + action vlan modify id 300 priority 7 + + tcpdump_start $eth2 + + $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup + + tc filter del dev $eth1 egress chain $(ES0) pref 3 + tc qdisc del dev $eth1 clsact + + bridge vlan del dev $eth0 vid 200 + bridge vlan del dev $eth1 vid 200 + ip link set br0 type bridge vlan_filtering 0 +} + test_skbedit_priority() { local num_pkts=100 @@ -304,7 +347,8 @@ trap cleanup EXIT ALL_TESTS=" test_vlan_pop test_vlan_push - test_vlan_modify + test_vlan_ingress_modify + test_vlan_egress_modify test_skbedit_priority " diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 8ec1922e974e..c3311c8c4089 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -428,7 +428,7 @@ for t in $TEST_CASES; do exit 1 fi done -(cd $TRACING_DIR; initialize_ftrace) # for cleanup +(cd $TRACING_DIR; finish_ftrace) # for cleanup prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc index 5f5b2ba3e557..60c02b482be8 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc @@ -11,8 +11,8 @@ SYSTEM="syscalls" EVENT="sys_enter_openat" FIELD="filename" EPROBE="eprobe_open" - -echo "e:$EPROBE $SYSTEM/$EVENT file=+0(\$filename):ustring" >> dynamic_events +OPTIONS="file=+0(\$filename):ustring" +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events grep -q "$EPROBE" dynamic_events test -d events/eprobes/$EPROBE @@ -37,4 +37,54 @@ echo "-:$EPROBE" >> dynamic_events ! grep -q "$EPROBE" dynamic_events ! test -d events/eprobes/$EPROBE +# test various ways to remove the probe (already tested with just event name) + +# With group name +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With group name and system/event +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With just event name and system/event +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With just event name and system/event and options +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With group name and system/event and options +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# Finally make sure what is in the dynamic_events file clears it too +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events` +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 000fd05e84b1..5f6cbec847fc 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -124,10 +124,22 @@ initialize_ftrace() { # Reset ftrace to initial-state [ -f uprobe_events ] && echo > uprobe_events [ -f synthetic_events ] && echo > synthetic_events [ -f snapshot ] && echo 0 > snapshot + +# Stop tracing while reading the trace file by default, to prevent +# the test results while checking it and to avoid taking a long time +# to check the result. + [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace + clear_trace enable_tracing } +finish_ftrace() { + initialize_ftrace +# And recover it to default. + [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace +} + check_requires() { # Check required files and tracers for i in "$@" ; do r=${i%:README} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index 84285a6f60b0..dc7ade196798 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -22,6 +22,9 @@ ppc64*) ppc*) ARG1=%r3 ;; +s390*) + ARG1=%r2 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 474ca1a9a088..47d84b5cb6ca 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -32,6 +32,10 @@ ppc*) GOODREG=%r3 BADREG=%msr ;; +s390*) + GOODREG=%r2 + BADREG=%s2 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc new file mode 100644 index 000000000000..05ffba299dbf --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc @@ -0,0 +1,63 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test histogram expression parsing +# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README + + +fail() { #msg + echo $1 + exit_fail +} + +test_hist_expr() { # test_name expression expected_val + trigger="events/sched/sched_process_fork/trigger" + + reset_trigger_file $trigger + + echo "Test hist trigger expressions - $1" + + echo "hist:keys=common_pid:x=$2" > $trigger + + for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done + + actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'` + + if [ $actual != $3 ]; then + fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual" + fi + + reset_trigger_file $trigger +} + +check_error() { # test_name command-with-error-pos-by-^ + trigger="events/sched/sched_process_fork/trigger" + + echo "Test hist trigger expressions - $1" + ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger +} + +test_hist_expr "Variable assignment" "123" "123" + +test_hist_expr "Subtraction not associative" "16-8-4-2" "2" + +test_hist_expr "Division not associative" "64/8/4/2" "1" + +test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14" + +test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12" + +test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8" + +test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5" + +# err pos for "too many subexpressions" is dependent on where +# the last subexpression was detected. This can vary depending +# on how the expression tree was generated. +check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4' +check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5' + +check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2' + +check_error "Division by zero" 'hist:keys=common_pid:x=3/^0' + +exit 0 diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 0e78b49d0f2f..fbcbdb6963b3 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -8,3 +8,4 @@ futex_wait_uninitialized_heap futex_wait_wouldblock futex_wait futex_requeue +futex_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index bd1fec59e010..5cc38de9d8ea 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -17,7 +17,8 @@ TEST_GEN_FILES := \ futex_wait_uninitialized_heap \ futex_wait_private_mapped_file \ futex_wait \ - futex_requeue + futex_requeue \ + futex_waitv TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index 1f8f6daaf1e7..3651ce17beeb 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -17,6 +17,7 @@ #include <pthread.h> #include "futextest.h" +#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-timeout" @@ -96,6 +97,12 @@ int main(int argc, char *argv[]) struct timespec to; pthread_t thread; int c; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1, + .flags = FUTEX_32, + .__reserved = 0 + }; while ((c = getopt(argc, argv, "cht:v:")) != -1) { switch (c) { @@ -118,7 +125,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(7); + ksft_set_plan(9); ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); @@ -175,6 +182,18 @@ int main(int argc, char *argv[]) res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); + /* futex_waitv with CLOCK_MONOTONIC */ + if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) + return RET_FAIL; + res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); + test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT); + + /* futex_waitv with CLOCK_REALTIME */ + if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) + return RET_FAIL; + res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME); + test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT); + ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 0ae390ff8164..7d7a6a06cdb7 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -22,6 +22,7 @@ #include <string.h> #include <time.h> #include "futextest.h" +#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-wouldblock" @@ -42,6 +43,12 @@ int main(int argc, char *argv[]) futex_t f1 = FUTEX_INITIALIZER; int res, ret = RET_PASS; int c; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1+1, + .flags = FUTEX_32, + .__reserved = 0 + }; while ((c = getopt(argc, argv, "cht:v:")) != -1) { switch (c) { @@ -61,18 +68,44 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { - fail("futex_wait returned: %d %s\n", - res ? errno : res, res ? strerror(errno) : ""); + ksft_test_result_fail("futex_wait returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wait\n"); } - print_result(TEST_NAME, ret); + if (clock_gettime(CLOCK_MONOTONIC, &to)) { + error("clock_gettime failed\n", errno); + return errno; + } + + to.tv_nsec += timeout_ns; + + if (to.tv_nsec >= 1000000000) { + to.tv_sec++; + to.tv_nsec -= 1000000000; + } + + info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); + if (!res || errno != EWOULDBLOCK) { + ksft_test_result_pass("futex_waitv returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv\n"); + } + + ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c new file mode 100644 index 000000000000..a94337f677e1 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_waitv.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * futex_waitv() test by André Almeida <andrealmeid@collabora.com> + * + * Copyright 2021 Collabora Ltd. + */ + +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <pthread.h> +#include <stdint.h> +#include <sys/shm.h> +#include "futextest.h" +#include "futex2test.h" +#include "logging.h" + +#define TEST_NAME "futex-wait" +#define WAKE_WAIT_US 10000 +#define NR_FUTEXES 30 +static struct futex_waitv waitv[NR_FUTEXES]; +u_int32_t futexes[NR_FUTEXES] = {0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *waiterfn(void *arg) +{ + struct timespec to; + int res; + + /* setting absolute timeout for futex2 */ + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC); + if (res < 0) { + ksft_test_result_fail("futex_waitv returned: %d %s\n", + errno, strerror(errno)); + } else if (res != NR_FUTEXES - 1) { + ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n", + res, NR_FUTEXES - 1); + } + + return NULL; +} + +int main(int argc, char *argv[]) +{ + pthread_t waiter; + int res, ret = RET_PASS; + struct timespec to; + int c, i; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(7); + ksft_print_msg("%s: Test FUTEX_WAITV\n", + basename(argv[0])); + + for (i = 0; i < NR_FUTEXES; i++) { + waitv[i].uaddr = (uintptr_t)&futexes[i]; + waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG; + waitv[i].val = 0; + waitv[i].__reserved = 0; + } + + /* Private waitv */ + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG); + if (res != 1) { + ksft_test_result_fail("futex_wake private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv private\n"); + } + + /* Shared waitv */ + for (i = 0; i < NR_FUTEXES; i++) { + int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); + + if (shm_id < 0) { + perror("shmget"); + exit(1); + } + + unsigned int *shared_data = shmat(shm_id, NULL, 0); + + *shared_data = 0; + waitv[i].uaddr = (uintptr_t)shared_data; + waitv[i].flags = FUTEX_32; + waitv[i].val = 0; + waitv[i].__reserved = 0; + } + + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0); + if (res != 1) { + ksft_test_result_fail("futex_wake shared returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv shared\n"); + } + + for (i = 0; i < NR_FUTEXES; i++) + shmdt(u64_to_ptr(waitv[i].uaddr)); + + /* Testing a waiter without FUTEX_32 flag */ + waitv[0].flags = FUTEX_PRIVATE_FLAG; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC); + if (res == EINVAL) { + ksft_test_result_fail("futex_waitv private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv without FUTEX_32\n"); + } + + /* Testing a waiter with an unaligned address */ + waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32; + waitv[0].uaddr = 1; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC); + if (res == EINVAL) { + ksft_test_result_fail("futex_wake private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv with an unaligned address\n"); + } + + /* Testing a NULL address for waiters.uaddr */ + waitv[0].uaddr = 0x00000000; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC); + if (res == EINVAL) { + ksft_test_result_fail("futex_waitv private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n"); + } + + /* Testing a NULL address for *waiters */ + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC); + if (res == EINVAL) { + ksft_test_result_fail("futex_waitv private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv NULL address in *waiters\n"); + } + + /* Testing an invalid clockid */ + if (clock_gettime(CLOCK_MONOTONIC, &to)) + error("gettime64 failed\n", errno); + + to.tv_sec++; + + res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI); + if (res == EINVAL) { + ksft_test_result_fail("futex_waitv private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_waitv invalid clockid\n"); + } + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 11a9d62290f5..5ccd599da6c3 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -79,3 +79,6 @@ echo echo ./futex_requeue $COLOR + +echo +./futex_waitv $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h new file mode 100644 index 000000000000..9d305520e849 --- /dev/null +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Futex2 library addons for futex tests + * + * Copyright 2021 Collabora Ltd. + */ +#include <stdint.h> + +#define u64_to_ptr(x) ((void *)(uintptr_t)(x)) + +/** + * futex_waitv - Wait at multiple futexes, wake on any + * @waiters: Array of waiters + * @nr_waiters: Length of waiters array + * @flags: Operation flags + * @timo: Optional timeout for operation + */ +static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, + unsigned long flags, struct timespec *timo, clockid_t clockid) +{ + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); +} diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index cc9c846585f0..a9ba782d8ca0 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -33,9 +33,9 @@ tap_timeout() { # Make sure tests will time out if utility is available. if [ -x /usr/bin/timeout ] ; then - /usr/bin/timeout --foreground "$kselftest_timeout" "$1" + /usr/bin/timeout --foreground "$kselftest_timeout" $1 else - "$1" + $1 fi } @@ -65,17 +65,25 @@ run_one() TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" - if [ ! -x "$TEST" ]; then - echo -n "# Warning: file $TEST is " - if [ ! -e "$TEST" ]; then - echo "missing!" - else - echo "not executable, correct this." - fi + if [ ! -e "$TEST" ]; then + echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else + cmd="./$BASENAME_TEST" + if [ ! -x "$TEST" ]; then + echo "# Warning: file $TEST is not executable" + + if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] + then + interpreter=$(head -n 1 "$TEST" | cut -c 3-) + cmd="$interpreter ./$BASENAME_TEST" + else + echo "not ok $test_num $TEST_HDR_MSG" + return + fi + fi cd `dirname $TEST` > /dev/null - ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index 8039e1eff938..9f55ccd169a1 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -84,7 +84,7 @@ int get_warnings_count(void) f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); if (fscanf(f, "%d", &warnings) < 1) warnings = 0; - fclose(f); + pclose(f); return warnings; } diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 38edea25631b..a26a3fa9e925 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -8,3 +8,4 @@ CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_UBSAN_BOUNDS=y CONFIG_UBSAN_TRAP=y +CONFIG_STACKPROTECTOR_STRONG=y diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index e95e79bd3126..95e904959207 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -56,8 +56,14 @@ if echo "$test" | grep -q '^#' ; then fi # If no expected output given, assume an Oops with back trace is success. +repeat=1 if [ -z "$expect" ]; then expect="call trace:" +else + if echo "$expect" | grep -q '^repeat:' ; then + repeat=$(echo "$expect" | cut -d' ' -f1 | cut -d: -f2) + expect=$(echo "$expect" | cut -d' ' -f2-) + fi fi # Prepare log for report checking @@ -83,7 +89,9 @@ dmesg > "$DMESG" # the signal that killed the subprocess, we must ignore the failure and # continue. However we don't silence stderr since there might be other # useful details reported there in the case of other unexpected conditions. -echo "$test" | cat >"$TRIGGER" || true +for i in $(seq 1 $repeat); do + echo "$test" | cat >"$TRIGGER" || true +done # Record and dump the results dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 09f7bfa383cc..6b36b7f5dcf9 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -12,6 +12,7 @@ CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING +REPORT_STACK_CANARY repeat:2 ok: stack canaries differ UNSET_SMEP pinned CR4 bits changed: DOUBLE_FAULT CORRUPT_PAC diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config index a7e8cd5bb265..1eef042a31e1 100644 --- a/tools/testing/selftests/memory-hotplug/config +++ b/tools/testing/selftests/memory-hotplug/config @@ -1,5 +1,4 @@ CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_NOTIFIER_ERROR_INJECTION=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_MEMORY_HOTREMOVE=y diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 19deb9cdf72f..7581a7348e1b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -31,3 +31,8 @@ rxtimestamp timestamping txtimestamp so_netns_cookie +test_unix_oob +gro +ioam6_parser +toeplitz +cmsg_so_mark diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 492b273743b4..aee76d1bb9da 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,12 +22,14 @@ TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh +TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += cmsg_so_mark.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -44,6 +46,7 @@ TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls TEST_GEN_FILES += toeplitz +TEST_GEN_FILES += cmsg_so_mark TEST_FILES := settings diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh new file mode 100644 index 000000000000..75528788cb95 --- /dev/null +++ b/tools/testing/selftests/net/amt.sh @@ -0,0 +1,284 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Author: Taehee Yoo <ap420073@gmail.com> +# +# This script evaluates the AMT driver. +# There are four network-namespaces, LISTENER, SOURCE, GATEWAY, RELAY. +# The role of LISTENER is to listen multicast traffic. +# In order to do that, it send IGMP group join message. +# The role of SOURCE is to send multicast traffic to listener. +# The role of GATEWAY is to work Gateway role of AMT interface. +# The role of RELAY is to work Relay role of AMT interface. +# +# +# +------------------------+ +# | LISTENER netns | +# | | +# | +------------------+ | +# | | l_gw | | +# | | 192.168.0.2/24 | | +# | | 2001:db8::2/64 | | +# | +------------------+ | +# | . | +# +------------------------+ +# . +# . +# +-----------------------------------------------------+ +# | . GATEWAY netns | +# | . | +# |+---------------------------------------------------+| +# || . br0 || +# || +------------------+ +------------------+ || +# || | gw_l | | amtg | || +# || | 192.168.0.1/24 | +--------+---------+ || +# || | 2001:db8::1/64 | | || +# || +------------------+ | || +# |+-------------------------------------|-------------+| +# | | | +# | +--------+---------+ | +# | | gw_relay | | +# | | 10.0.0.1/24 | | +# | +------------------+ | +# | . | +# +-----------------------------------------------------+ +# . +# . +# +-----------------------------------------------------+ +# | RELAY netns . | +# | +------------------+ | +# | | relay_gw | | +# | | 10.0.0.2/24 | | +# | +--------+---------+ | +# | | | +# | | | +# | +------------------+ +--------+---------+ | +# | | relay_src | | amtr | | +# | | 172.17.0.1/24 | +------------------+ | +# | | 2001:db8:3::1/64 | | +# | +------------------+ | +# | . | +# | . | +# +-----------------------------------------------------+ +# . +# . +# +------------------------+ +# | . | +# | +------------------+ | +# | | src_relay | | +# | | 172.17.0.2/24 | | +# | | 2001:db8:3::2/64 | | +# | +------------------+ | +# | SOURCE netns | +# +------------------------+ +#============================================================================== + +readonly LISTENER=$(mktemp -u listener-XXXXXXXX) +readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX) +readonly RELAY=$(mktemp -u relay-XXXXXXXX) +readonly SOURCE=$(mktemp -u source-XXXXXXXX) +ERR=4 +err=0 + +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + exit $ERR +} + +create_namespaces() +{ + ip netns add "${LISTENER}" || exit_cleanup + ip netns add "${GATEWAY}" || exit_cleanup "${LISTENER}" + ip netns add "${RELAY}" || exit_cleanup "${LISTENER}" "${GATEWAY}" + ip netns add "${SOURCE}" || exit_cleanup "${LISTENER}" "${GATEWAY}" \ + "${RELAY}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}" +} + +setup_interface() +{ + for ns in "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add l_gw type veth peer name gw_l + ip link add gw_relay type veth peer name relay_gw + ip link add relay_src type veth peer name src_relay + + ip link set l_gw netns "${LISTENER}" up + ip link set gw_l netns "${GATEWAY}" up + ip link set gw_relay netns "${GATEWAY}" up + ip link set relay_gw netns "${RELAY}" up + ip link set relay_src netns "${RELAY}" up + ip link set src_relay netns "${SOURCE}" up mtu 1400 + + ip netns exec "${LISTENER}" ip a a 192.168.0.2/24 dev l_gw + ip netns exec "${LISTENER}" ip r a default via 192.168.0.1 dev l_gw + ip netns exec "${LISTENER}" ip a a 2001:db8::2/64 dev l_gw + ip netns exec "${LISTENER}" ip r a default via 2001:db8::1 dev l_gw + ip netns exec "${LISTENER}" ip a a 239.0.0.1/32 dev l_gw autojoin + ip netns exec "${LISTENER}" ip a a ff0e::5:6/128 dev l_gw autojoin + + ip netns exec "${GATEWAY}" ip a a 192.168.0.1/24 dev gw_l + ip netns exec "${GATEWAY}" ip a a 2001:db8::1/64 dev gw_l + ip netns exec "${GATEWAY}" ip a a 10.0.0.1/24 dev gw_relay + ip netns exec "${GATEWAY}" ip link add br0 type bridge + ip netns exec "${GATEWAY}" ip link set br0 up + ip netns exec "${GATEWAY}" ip link set gw_l master br0 + ip netns exec "${GATEWAY}" ip link set gw_l up + ip netns exec "${GATEWAY}" ip link add amtg master br0 type amt \ + mode gateway local 10.0.0.1 discovery 10.0.0.2 dev gw_relay \ + gateway_port 2268 relay_port 2268 + ip netns exec "${RELAY}" ip a a 10.0.0.2/24 dev relay_gw + ip netns exec "${RELAY}" ip link add amtr type amt mode relay \ + local 10.0.0.2 dev relay_gw relay_port 2268 max_tunnels 4 + ip netns exec "${RELAY}" ip a a 172.17.0.1/24 dev relay_src + ip netns exec "${RELAY}" ip a a 2001:db8:3::1/64 dev relay_src + ip netns exec "${SOURCE}" ip a a 172.17.0.2/24 dev src_relay + ip netns exec "${SOURCE}" ip a a 2001:db8:3::2/64 dev src_relay + ip netns exec "${SOURCE}" ip r a default via 172.17.0.1 dev src_relay + ip netns exec "${SOURCE}" ip r a default via 2001:db8:3::1 dev src_relay + ip netns exec "${RELAY}" ip link set amtr up + ip netns exec "${GATEWAY}" ip link set amtg up +} + +setup_sysctl() +{ + ip netns exec "${RELAY}" sysctl net.ipv4.ip_forward=1 -w -q +} + +setup_iptables() +{ + ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \ + -d 239.0.0.1 -j TTL --ttl-set 2 + ip netns exec "${RELAY}" ip6tables -t mangle -I PREROUTING \ + -j HL --hl-set 2 +} + +setup_mcast_routing() +{ + ip netns exec "${RELAY}" smcrouted + ip netns exec "${RELAY}" smcroutectl a relay_src \ + 172.17.0.2 239.0.0.1 amtr + ip netns exec "${RELAY}" smcroutectl a relay_src \ + 2001:db8:3::2 ff0e::5:6 amtr +} + +test_remote_ip() +{ + REMOTE=$(ip netns exec "${GATEWAY}" \ + ip -d -j link show amtg | jq .[0].linkinfo.info_data.remote) + if [ $REMOTE == "\"10.0.0.2\"" ]; then + printf "TEST: %-60s [ OK ]\n" "amt discovery" + else + printf "TEST: %-60s [FAIL]\n" "amt discovery" + ERR=1 + fi +} + +send_mcast_torture4() +{ + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' +} + + +send_mcast_torture6() +{ + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' +} + +check_features() +{ + ip link help 2>&1 | grep -q amt + if [ $? -ne 0 ]; then + echo "Missing amt support in iproute2" >&2 + exit_cleanup + fi +} + +test_ipv4_forward() +{ + RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000) + if [ "$RESULT4" == "172.17.0.2" ]; then + printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding" + exit 0 + else + printf "TEST: %-60s [FAIL]\n" "IPv4 amt multicast forwarding" + exit 1 + fi +} + +test_ipv6_forward() +{ + RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000) + if [ "$RESULT6" == "2001:db8:3::2" ]; then + printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding" + exit 0 + else + printf "TEST: %-60s [FAIL]\n" "IPv6 amt multicast forwarding" + exit 1 + fi +} + +send_mcast4() +{ + sleep 2 + ip netns exec "${SOURCE}" bash -c \ + 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & +} + +send_mcast6() +{ + sleep 2 + ip netns exec "${SOURCE}" bash -c \ + 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & +} + +check_features + +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_interface +setup_sysctl +setup_iptables +setup_mcast_routing +test_remote_ip +test_ipv4_forward & +pid=$! +send_mcast4 +wait $pid || err=$? +if [ $err -eq 1 ]; then + ERR=1 +fi +test_ipv6_forward & +pid=$! +send_mcast6 +wait $pid || err=$? +if [ $err -eq 1 ]; then + ERR=1 +fi +send_mcast_torture4 +printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +send_mcast_torture6 +printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +sleep 5 +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh new file mode 100755 index 000000000000..b5af08af8559 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1} +# +# Create a veth pair and set IPs/routes on both. Then ping to establish +# an entry in the ARP/ND table. Depending on the test set sysctl option to +# 1 or 0. Set remote veth down which will cause local veth to go into a no +# carrier state. Depending on the test check the ARP/ND table: +# +# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier +# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry +# + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +readonly V4_ADDR0=10.0.10.1 +readonly V4_ADDR1=10.0.10.2 +readonly V6_ADDR0=2001:db8:91::1 +readonly V6_ADDR1=2001:db8:91::2 +nsid=100 + +cleanup_v6() +{ + ip netns del me + ip netns del peer + + sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 +} + +create_ns() +{ + local n=${1} + + ip netns del ${n} 2>/dev/null + + ip netns add ${n} + ip netns set ${n} $((nsid++)) + ip -netns ${n} link set lo up +} + + +setup_v6() { + create_ns me + create_ns peer + + IP="ip -netns me" + + $IP li add veth1 type veth peer name veth2 + $IP li set veth1 up + $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad + $IP li set veth2 netns peer up + ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad + + ip netns exec me sysctl -w $1 >/dev/null 2>&1 + + # Establish an ND cache entry + ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + # Should have the veth1 entry in ND table + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v6 + echo "failed" + exit + fi + + # Set veth2 down, which will put veth1 in NOCARRIER state + ip netns exec peer ip link set veth2 down +} + +setup_v4() { + ip netns add "${PEER_NS}" + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 up + ip link set dev veth1 netns "${PEER_NS}" + ip netns exec "${PEER_NS}" ip link set dev veth1 up + ip addr add $V4_ADDR0/24 dev veth0 + ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1 + ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1 + ip route add default via $V4_ADDR0 dev veth0 + + sysctl -w "$1" >/dev/null 2>&1 + + # Establish an ARP cache entry + ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1 + # Should have the veth1 entry in ARP table + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v4 + echo "failed" + exit + fi + + # Set veth1 down, which will put veth0 in NOCARRIER state + ip netns exec "${PEER_NS}" ip link set veth1 down +} + +cleanup_v4() { + ip neigh flush dev veth0 + ip link del veth0 + local -r ns="$(ip netns list|grep $PEER_NS)" + [ -n "$ns" ] && ip netns del $ns 2>/dev/null + + sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 +} + +# Run test when arp_evict_nocarrier = 1 (default). +run_arp_evict_nocarrier_enabled() { + echo "run arp_evict_nocarrier=1 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1" + + # ARP table should be empty + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + else + echo "ok" + fi + + cleanup_v4 +} + +# Run test when arp_evict_nocarrier = 0 +run_arp_evict_nocarrier_disabled() { + echo "run arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v4 +} + +run_arp_evict_nocarrier_disabled_all() { + echo "run all.arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v4 +} + +run_ndisc_evict_nocarrier_enabled() { + echo "run ndisc_evict_nocarrier=1 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + else + echo "ok" + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled() { + echo "run ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled_all() { + echo "run all.ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" + + ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v6 +} + +run_all_tests() { + run_arp_evict_nocarrier_enabled + run_arp_evict_nocarrier_disabled + run_arp_evict_nocarrier_disabled_all + run_ndisc_evict_nocarrier_enabled + run_ndisc_evict_nocarrier_disabled + run_ndisc_evict_nocarrier_disabled_all +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +run_all_tests diff --git a/tools/testing/selftests/net/cmsg_so_mark.c b/tools/testing/selftests/net/cmsg_so_mark.c new file mode 100644 index 000000000000..27f2804892a7 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_mark.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <errno.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> + +int main(int argc, const char **argv) +{ + char cbuf[CMSG_SPACE(sizeof(__u32))]; + struct addrinfo hints, *ai; + struct cmsghdr *cmsg; + struct iovec iov[1]; + struct msghdr msg; + int mark; + int err; + int fd; + + if (argc != 4) { + fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]); + return 1; + } + mark = atoi(argv[3]); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + + ai = NULL; + err = getaddrinfo(argv[1], argv[2], &hints, &ai); + if (err) { + fprintf(stderr, "Can't resolve address: %s\n", strerror(errno)); + return 1; + } + + fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP); + if (fd < 0) { + fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); + freeaddrinfo(ai); + return 1; + } + + iov[0].iov_base = "bla"; + iov[0].iov_len = 4; + + msg.msg_name = ai->ai_addr; + msg.msg_namelen = ai->ai_addrlen; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_MARK; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *(__u32 *)CMSG_DATA(cmsg) = mark; + + err = sendmsg(fd, &msg, 0); + + close(fd); + freeaddrinfo(ai); + return err != 4; +} diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh new file mode 100755 index 000000000000..19c6aab8d0e9 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_mark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS=ns +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +MARK=1000 + +cleanup() +{ + ip netns del $NS +} + +trap cleanup EXIT + +# Namespaces +ip netns add $NS + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +ip -netns $NS rule add fwmark $MARK lookup 300 +ip -6 -netns $NS rule add fwmark $MARK lookup 300 +ip -netns $NS route add prohibit any table 300 +ip -6 -netns $NS route add prohibit any table 300 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $((MARK + 1)) +check_result $? 0 "IPv4 pass" +ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $((MARK + 1)) +check_result $? 0 "IPv6 pass" + +ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $MARK +check_result $? 1 "IPv4 rejection" +ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $MARK +check_result $? 1 "IPv6 rejection" + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 21b646d10b88..ead7963b9bf0 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,3 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_CRYPTO_SM4=y +CONFIG_AMT=m diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 13350cd5c8ac..3313566ce906 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -289,6 +289,12 @@ set_sysctl() run_cmd sysctl -q -w $* } +# get sysctl values in NS-A +get_sysctl() +{ + ${NSA_CMD} sysctl -n $* +} + ################################################################################ # Setup for tests @@ -439,10 +445,13 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } @@ -1003,6 +1012,60 @@ ipv4_tcp_md5() run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + test_ipv4_md5_vrf__vrf_server__no_bind_ifindex + test_ipv4_md5_vrf__global_server__bind_ifindex0 +} + +test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() +{ + log_start + show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" + + log_start + show_hint "Binding both the socket and the key is not required but it works" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" +} + +test_ipv4_md5_vrf__global_server__bind_ifindex0() +{ + # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections + local old_tcp_l3mdev_accept + old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept) + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" + log_start + + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" + + # restore value + set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept" } ipv4_tcp_novrf() diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 0d293391e9a4..b5a69ad191b0 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -2078,6 +2078,7 @@ basic_res() "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" log_test $? 0 "Dump all nexthop buckets in a group" + sleep 0.1 (( $($IP -j nexthop bucket list id 101 | jq '[.[] | select(.bucket.idle_time > 0 and .bucket.idle_time < 2)] | length') == 4 )) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d97bd6889446..72ee644d47bf 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ + ip6_forward_instats_vrf.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 675eff45b037..1162836f8f32 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -482,10 +482,15 @@ v3exc_timeout_test() local X=("192.0.2.20" "192.0.2.30") # GMI should be 3 seconds - ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + ip link set dev br0 type bridge mcast_query_interval 100 \ + mcast_query_response_interval 100 \ + mcast_membership_interval 300 v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP - ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + ip link set dev br0 type bridge mcast_query_interval 500 \ + mcast_query_response_interval 500 \ + mcast_membership_interval 1500 + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q sleep 3 bridge -j -d -s mdb show dev br0 \ @@ -517,7 +522,8 @@ v3exc_timeout_test() log_test "IGMPv3 group $TEST_GROUP exclude timeout" ip link set dev br0 type bridge mcast_query_interval 12500 \ - mcast_query_response_interval 1000 + mcast_query_response_interval 1000 \ + mcast_membership_interval 26000 v3cleanup $swp1 $TEST_GROUP } diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index ffdcfa87ca2b..e2b9ff773c6b 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -479,10 +479,15 @@ mldv2exc_timeout_test() local X=("2001:db8:1::20" "2001:db8:1::30") # GMI should be 3 seconds - ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + ip link set dev br0 type bridge mcast_query_interval 100 \ + mcast_query_response_interval 100 \ + mcast_membership_interval 300 mldv2exclude_prepare $h1 - ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + ip link set dev br0 type bridge mcast_query_interval 500 \ + mcast_query_response_interval 500 \ + mcast_membership_interval 1500 + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q sleep 3 bridge -j -d -s mdb show dev br0 \ @@ -514,7 +519,8 @@ mldv2exc_timeout_test() log_test "MLDv2 group $TEST_GROUP exclude timeout" ip link set dev br0 type bridge mcast_query_interval 12500 \ - mcast_query_response_interval 1000 + mcast_query_response_interval 1000 \ + mcast_membership_interval 26000 mldv2cleanup $swp1 } diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 2c14a86adaaa..de9944d42027 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -563,12 +563,6 @@ devlink_trap_group_policer_get() | jq '.[][][]["policer"]' } -devlink_trap_policer_ids_get() -{ - devlink -j -p trap policer show \ - | jq '.[]["'$DEVLINK_DEV'"][]["policer"]' -} - devlink_port_by_netdev() { local if_name=$1 diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d2950..bf17e485684f 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -39,3 +39,9 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process +# filter by software/hardware +TC_FLAG=skip_hw +# IPv6 traceroute utility name. +TROUTE6=traceroute6 + diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh new file mode 100755 index 000000000000..9f5b3e2e5e95 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test ipv6 stats on the incoming if when forwarding with VRF + +ALL_TESTS=" + ipv6_ping + ipv6_in_too_big_err + ipv6_in_hdr_err + ipv6_in_addr_err + ipv6_in_discard +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:1:1::2/64 + ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1 + simple_if_fini $h1 2001:1:1::2/64 +} + +router_create() +{ + vrf_create router + __simple_if_init $rtr1 router 2001:1:1::1/64 + __simple_if_init $rtr2 router 2001:1:2::1/64 + mtu_set $rtr2 1280 +} + +router_destroy() +{ + mtu_restore $rtr2 + __simple_if_fini $rtr2 2001:1:2::1/64 + __simple_if_fini $rtr1 2001:1:1::1/64 + vrf_destroy router +} + +h2_create() +{ + simple_if_init $h2 2001:1:2::2/64 + ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + mtu_set $h2 1280 +} + +h2_destroy() +{ + mtu_restore $h2 + ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + simple_if_fini $h2 2001:1:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rtr1=${NETIFS[p2]} + + rtr2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + router_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + router_destroy + h1_destroy + vrf_cleanup +} + +ipv6_in_too_big_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + local vrf_name=$(master_name_get $h1) + + # Send too big packets + ip vrf exec $vrf_name \ + $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InTooBigErrors" +} + +ipv6_in_hdr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + local vrf_name=$(master_name_get $h1) + + # Send packets with hop limit 1, easiest with traceroute6 as some ping6 + # doesn't allow hop limit to be specified + ip vrf exec $vrf_name \ + $TROUTE6 2001:1:2::2 &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InHdrErrors" +} + +ipv6_in_addr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + local vrf_name=$(master_name_get $h1) + + # Disable forwarding temporary while sending the packet + sysctl -qw net.ipv6.conf.all.forwarding=0 + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + sysctl -qw net.ipv6.conf.all.forwarding=1 + + local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InAddrErrors" +} + +ipv6_in_discard() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards) + local vrf_name=$(master_name_get $h1) + + # Add a policy to discard + ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + ip xfrm policy del dst 2001:1:2::2/128 dir fwd + + local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InDiscards" +} +ipv6_ping() +{ + RET=0 + + ping6_test $h1 2001:1:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh new file mode 100755 index 000000000000..96c97064f2d3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel without key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 + sw2_flat_create $ol2 $ul2 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh new file mode 100755 index 000000000000..ff9fb0db9bd1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 key 233 + sw2_flat_create $ol2 $ul2 key 233 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh new file mode 100755 index 000000000000..12c138785242 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with keys. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 ikey 111 okey 222 + sw2_flat_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh new file mode 100755 index 000000000000..83b55c30a5c3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 + sw2_hierarchical_create $ol2 $ul2 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh new file mode 100755 index 000000000000..256607916d92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 key 22 + sw2_hierarchical_create $ol2 $ul2 key 22 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh new file mode 100755 index 000000000000..ad1bcd6334a8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222 + sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh new file mode 100644 index 000000000000..58a3597037b1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -0,0 +1,438 @@ +# SPDX-License-Identifier: GPL-2.0 +#!/bin/bash + +# Handles creation and destruction of IP-in-IP or GRE tunnels over the given +# topology. Supports both flat and hierarchical models. +# +# Flat Model: +# Overlay and underlay share the same VRF. +# SW1 uses default VRF so tunnel has no bound dev. +# SW2 uses non-default VRF tunnel has a bound dev. +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 --. | +# | tos=inherit | | +# | . | +# | .--------------------- | +# | | | +# | v | +# | + $ul1.111 (vlan) | +# | | 2001:db8:10::1/64 | +# | \ | +# | \____________ | +# | | | +# | VRF default + $ul1 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul2 + | +# | ___________| | +# | / | +# | / | +# | + $ul2.111 (vlan) | +# | ^ 2001:db8:10::2/64 | +# | | | +# | | | +# | '----------------------. | +# | + g2a (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 --' | +# | tos=inherit | +# | | +# | + $ol2 | +# | | 203.0.113.2/24 | +# | VRF v$ol2 | 2001:db8:2::2/64 | +# +---------------------|-----------------------+ +# +---------------------|----------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ +# +# Hierarchical model: +# The tunnel is bound to a device in a different VRF +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | +-----------------------|-----------------+ | +# | | $ol1 + | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | | | +# | | + g1a (ip6gre) | | +# | | loc=2001:db8:3::1 | | +# | | rem=2001:db8:3::2 | | +# | | tos=inherit | | +# | | ^ | | +# | | VRF v$ol1 | | | +# | +--------------------|--------------------+ | +# | | | +# | +--------------------|--------------------+ | +# | | VRF v$ul1 | | | +# | | | | | +# | | v | | +# | | dummy1 + | | +# | | 2001:db8:3::1/64 | | +# | | .-----------' | | +# | | | | | +# | | v | | +# | | + $ul1.111 (vlan) | | +# | | | 2001:db8:10::1/64 | | +# | | \ | | +# | | \__________ | | +# | | | | | +# | | + $ul1 | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|---------------------+ +# | SW2 | | +# | +---------------------|-------------------+ | +# | | + $ul2 | | +# | | _____| | | +# | | / | | +# | | / | | +# | | | $ul2.111 (vlan) | | +# | | + 2001:db8:10::2/64 | | +# | | ^ | | +# | | | | | +# | | '------. | | +# | | dummy2 + | | +# | | 2001:db8:3::2/64 | | +# | | ^ | | +# | | | | | +# | | | | | +# | | VRF v$ul2 | | | +# | +---------------------|-------------------+ | +# | | | +# | +---------------------|-------------------+ | +# | | VRF v$ol2 | | | +# | | | | | +# | | v | | +# | | g2a (ip6gre) + | | +# | | loc=2001:db8:3::2 | | +# | | rem=2001:db8:3::1 | | +# | | tos=inherit | | +# | | | | +# | | $ol2 + | | +# | | 203.0.113.2/24 | | | +# | | 2001:db8:2::2/64 | | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|--------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ + +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64 + ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2 + ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2 + simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64 + ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2 + ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 + ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2 + simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64 +} + +sw1_flat_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip link set dev $ol1 up + __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64 + + ip link set dev $ul1 up + vlan_create $ul1 111 "" 2001:db8:10::1/64 + + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit "$@" + ip link set dev g1a up + __addr_add_del g1a add "2001:db8:3::1/128" + + ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add 203.0.113.0/24 dev g1a + ip -6 route add 2001:db8:2::/64 dev g1a +} + +sw1_flat_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del 2001:db8:2::/64 + ip route del 203.0.113.0/24 + ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2 + + __simple_if_fini g1a 2001:db8:3::1/128 + tunnel_destroy g1a + + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + __simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_flat_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 2001:db8:10::2/64 + + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev v$ol2 "$@" + __simple_if_init g2a v$ol2 2001:db8:3::2/128 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_flat_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + + __simple_if_fini g2a 2001:db8:3::2/128 + tunnel_destroy g2a + + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + +sw1_hierarchical_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64 + simple_if_init $ul1 + ip link add name dummy1 type dummy + __simple_if_init dummy1 v$ul1 2001:db8:3::1/64 + + vlan_create $ul1 111 v$ul1 2001:db8:10::1/64 + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit dev dummy1 "$@" + ip link set dev g1a master v$ol1 + + ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add vrf v$ol1 203.0.113.0/24 dev g1a + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a +} + +sw1_hierarchical_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + ip -6 route del vrf v$ul1 2001:db8:3::2/128 + + tunnel_destroy g1a + vlan_destroy $ul1 111 + + __simple_if_fini dummy1 2001:db8:3::1/64 + ip link del dev dummy1 + + simple_if_fini $ul1 + simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_hierarchical_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + simple_if_init $ul2 + + ip link add name dummy2 type dummy + __simple_if_init dummy2 v$ul2 2001:db8:3::2/64 + + vlan_create $ul2 111 v$ul2 2001:db8:10::2/64 + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev dummy2 "$@" + ip link set dev g2a master v$ol2 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_hierarchical_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ul2 2001:db8:3::1/128 + + tunnel_destroy g2a + vlan_destroy $ul2 111 + + __simple_if_fini dummy2 2001:db8:3::2/64 + ip link del dev dummy2 + + simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + +test_traffic_ip4ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 203.0.113.1 action pass + + $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ + -B 203.0.113.1 -t ip -q -d 1msec + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +test_traffic_ip6ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 2001:db8:2::1 action pass + + $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t ip -q -d 1msec + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +topo_mtu_change() +{ + local mtu=$1 + + ip link set mtu $mtu dev $h1 + ip link set mtu $mtu dev $ol1 + ip link set mtu $mtu dev g1a + ip link set mtu $mtu dev $ul1 + ip link set mtu $mtu dev $ul1.111 + ip link set mtu $mtu dev $h2 + ip link set mtu $mtu dev $ol2 + ip link set mtu $mtu dev g2a + ip link set mtu $mtu dev $ul2 + ip link set mtu $mtu dev $ul2.111 +} + +test_mtu_change() +{ + RET=0 + + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_fail $? "ping GRE IPv6 should not pass with packet size 1800" + + RET=0 + + topo_mtu_change 2000 + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_err $? + log_test "ping GRE IPv6, packet size 1800 after MTU change" +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e7fc5c35b569..dfd827b7a9f9 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -23,6 +23,8 @@ MC_CLI=${MC_CLI:=smcroutectl} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} +REQUIRE_JQ=${REQUIRE_JQ:=yes} +REQUIRE_MZ=${REQUIRE_MZ:=yes} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -141,8 +143,12 @@ require_command() fi } -require_command jq -require_command $MZ +if [[ "$REQUIRE_JQ" = "yes" ]]; then + require_command jq +fi +if [[ "$REQUIRE_MZ" = "yes" ]]; then + require_command $MZ +fi if [[ ! -v NUM_NETIFS ]]; then echo "SKIP: importer does not define \"NUM_NETIFS\"" @@ -280,6 +286,15 @@ log_test() return 0 } +log_test_skip() +{ + local test_name=$1 + local opt_str=$2 + + printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str" + return 0 +} + log_info() { local msg=$1 @@ -751,6 +766,14 @@ qdisc_parent_stats_get() | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" } +ipv6_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 +} + humanize() { local speed=$1; shift diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index 8bd85da1905a..75a37c189ef3 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -4,9 +4,12 @@ ALL_TESTS=" ping_ipv4 tbf_test + tbf_root_test " source $lib_dir/sch_tbf_core.sh +QDISC_TYPE=${QDISC% *} + tbf_test_one() { local bs=$1; shift @@ -22,6 +25,8 @@ tbf_test_one() tbf_test() { + log_info "Testing root-$QDISC_TYPE-tbf" + # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 @@ -29,6 +34,29 @@ tbf_test() tc qdisc del dev $swp2 root } +tbf_root_test() +{ + local bs=128K + + log_info "Testing root-tbf-$QDISC_TYPE" + + tc qdisc replace dev $swp2 root handle 1: \ + tbf rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ + $QDISC 3 priomap 2 1 0 + tc qdisc replace dev $swp2 parent 10:3 handle 103: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:1 handle 101: \ + bfifo limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 400 $bs + + tc qdisc del dev $swp2 root +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 0e18e8be6e2a..bce8bb8d2b6f 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -16,6 +16,16 @@ tc_check_packets() tc_rule_handle_stats_get "$id" "$handle" > /dev/null } +tc_check_at_least_x_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + + busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null +} + tc_check_packets_hitting() { local id=$1 diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 3caf72bb9c6a..a2b9fad5a9a6 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -6,7 +6,7 @@ # This script evaluates the IOAM insertion for IPv6 by checking the IOAM data # consistency directly inside packets on the receiver side. Tests are divided # into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates +# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates # wider use cases that do not fall into the other two categories). Both OUTPUT # and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL # tests use the entire three-node topology (alpha, beta, gamma). Each test is @@ -200,7 +200,7 @@ check_kernel_compatibility() ip -netns ioam-tmp-node link set veth0 up ip -netns ioam-tmp-node link set veth1 up - ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null + ip -netns ioam-tmp-node ioam namespace add 0 ns_ad=$? ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" @@ -214,11 +214,11 @@ check_kernel_compatibility() exit 1 fi - ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \ - type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 tr_ad=$? - ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace" + ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6" tr_sh=$? if [[ $tr_ad != 0 || $tr_sh != 0 ]] @@ -232,6 +232,30 @@ check_kernel_compatibility() ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true + + lsmod | grep -q "ip6_tunnel" + ip6tnl_loaded=$? + + if [ $ip6tnl_loaded = 0 ] + then + encap_tests=0 + else + modprobe ip6_tunnel &>/dev/null + lsmod | grep -q "ip6_tunnel" + encap_tests=$? + + if [ $encap_tests != 0 ] + then + ip a | grep -q "ip6tnl0" + encap_tests=$? + + if [ $encap_tests != 0 ] + then + echo "Note: ip6_tunnel not found neither as a module nor inside the" \ + "kernel, tests that require it (encap mode) will be omitted" + fi + fi + fi } cleanup() @@ -242,6 +266,11 @@ cleanup() ip netns del ioam-node-alpha || true ip netns del ioam-node-beta || true ip netns del ioam-node-gamma || true + + if [ $ip6tnl_loaded != 0 ] + then + modprobe -r ip6_tunnel 2>/dev/null || true + fi } setup() @@ -329,6 +358,12 @@ log_test_failed() printf "TEST: %-60s [FAIL]\n" "${desc}" } +log_results() +{ + echo "- Tests passed: ${npassed}" + echo "- Tests failed: ${nfailed}" +} + run_test() { local name=$1 @@ -349,17 +384,27 @@ run_test() ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null if [ $? != 0 ] then + nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" + if [ $? = 0 ] + then + npassed=$((npassed+1)) + log_test_passed "${desc}" + else + nfailed=$((nfailed+1)) + log_test_failed "${desc}" + fi fi } run() { echo + printf "%0.s-" {1..74} + echo echo "OUTPUT tests" printf "%0.s-" {1..74} echo @@ -369,7 +414,8 @@ run() for t in $TESTS_OUTPUT do - $t + $t "inline" + [ $encap_tests = 0 ] && $t "encap" done # clean OUTPUT settings @@ -378,6 +424,8 @@ run() echo + printf "%0.s-" {1..74} + echo echo "INPUT tests" printf "%0.s-" {1..74} echo @@ -387,7 +435,8 @@ run() for t in $TESTS_INPUT do - $t + $t "inline" + [ $encap_tests = 0 ] && $t "encap" done # clean INPUT settings @@ -396,7 +445,8 @@ run() ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} ip -netns ioam-node-alpha route change db01::/64 dev veth0 - + echo + printf "%0.s-" {1..74} echo echo "GLOBAL tests" printf "%0.s-" {1..74} @@ -404,8 +454,12 @@ run() for t in $TESTS_GLOBAL do - $t + $t "inline" + [ $encap_tests = 0 ] && $t "encap" done + + echo + log_results } bit2type=( @@ -431,11 +485,16 @@ out_undef_ns() ############################################################################## local desc="Unknown IOAM namespace" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0x800000 ns 0 size 4 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0x800000 0 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0x800000 0 + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } out_no_room() @@ -446,11 +505,16 @@ out_no_room() ############################################################################## local desc="Missing trace room" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0xc00000 ns 123 size 4 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0xc00000 123 + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0xc00000 123 + + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } out_bits() @@ -465,15 +529,36 @@ out_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + for i in {0..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ - prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 - - run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 ${bit2type[$i]} 123 + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 &>/dev/null + + local cmd_res=$? + local descr="${desc/<n>/$i}" + + if [[ $i -ge 12 && $i -le 21 ]] + then + if [ $cmd_res != 0 ] + then + npassed=$((npassed+1)) + log_test_passed "$descr" + else + nfailed=$((nfailed+1)) + log_test_failed "$descr" + fi + else + run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \ + ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + fi done + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + bit2size[22]=$tmp } @@ -485,11 +570,16 @@ out_full_supp_trace() ############################################################################## local desc="Full supported trace" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0xfff002 ns 123 size 100 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 100 dev veth0 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0xfff002 123 + + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } @@ -510,11 +600,16 @@ in_undef_ns() ############################################################################## local desc="Unknown IOAM namespace" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0x800000 ns 0 size 4 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0x800000 0 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0x800000 0 + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } in_no_room() @@ -525,11 +620,16 @@ in_no_room() ############################################################################## local desc="Missing trace room" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0xc00000 ns 123 size 4 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0xc00000 123 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0xc00000 123 + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } in_bits() @@ -544,15 +644,21 @@ in_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) - for i in {0..22} + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + for i in {0..11} {22..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ - prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 - run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" ioam-node-alpha \ + ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 done + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + bit2size[22]=$tmp } @@ -569,11 +675,16 @@ in_oflag() # back the IOAM namespace that was previously configured on the sender. ip -netns ioam-node-alpha ioam namespace add 123 - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0xc00000 ns 123 size 4 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0xc00000 123 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0xc00000 123 + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down # And we clean the exception for this test to get things back to normal for # other INPUT tests @@ -588,11 +699,16 @@ in_full_supp_trace() ############################################################################## local desc="Full supported trace" - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \ - type 0xfff002 ns 123 size 80 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \ - db01::1 veth0 0xfff002 123 + ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 0xfff002 123 + + [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down } @@ -611,11 +727,16 @@ fwd_full_supp_trace() ############################################################################## local desc="Forward - Full supported trace" - ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \ - type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" + [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up + + ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \ - db02::2 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \ + db01::2 db02::2 veth0 0xfff002 123 + + [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down } @@ -625,6 +746,9 @@ fwd_full_supp_trace() # # ################################################################################ +npassed=0 +nfailed=0 + if [ "$(id -u)" -ne 0 ] then echo "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d376cb2c383c..8f6997d35816 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -94,16 +94,6 @@ enum { TEST_OUT_BIT9, TEST_OUT_BIT10, TEST_OUT_BIT11, - TEST_OUT_BIT12, - TEST_OUT_BIT13, - TEST_OUT_BIT14, - TEST_OUT_BIT15, - TEST_OUT_BIT16, - TEST_OUT_BIT17, - TEST_OUT_BIT18, - TEST_OUT_BIT19, - TEST_OUT_BIT20, - TEST_OUT_BIT21, TEST_OUT_BIT22, TEST_OUT_FULL_SUPP_TRACE, @@ -125,16 +115,6 @@ enum { TEST_IN_BIT9, TEST_IN_BIT10, TEST_IN_BIT11, - TEST_IN_BIT12, - TEST_IN_BIT13, - TEST_IN_BIT14, - TEST_IN_BIT15, - TEST_IN_BIT16, - TEST_IN_BIT17, - TEST_IN_BIT18, - TEST_IN_BIT19, - TEST_IN_BIT20, - TEST_IN_BIT21, TEST_IN_BIT22, TEST_IN_FULL_SUPP_TRACE, @@ -199,30 +179,6 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, ioam6h->nodelen != 2 || ioam6h->remlen; - case TEST_OUT_BIT12: - case TEST_IN_BIT12: - case TEST_OUT_BIT13: - case TEST_IN_BIT13: - case TEST_OUT_BIT14: - case TEST_IN_BIT14: - case TEST_OUT_BIT15: - case TEST_IN_BIT15: - case TEST_OUT_BIT16: - case TEST_IN_BIT16: - case TEST_OUT_BIT17: - case TEST_IN_BIT17: - case TEST_OUT_BIT18: - case TEST_IN_BIT18: - case TEST_OUT_BIT19: - case TEST_IN_BIT19: - case TEST_OUT_BIT20: - case TEST_IN_BIT20: - case TEST_OUT_BIT21: - case TEST_IN_BIT21: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen != 1; - case TEST_OUT_BIT22: case TEST_IN_BIT22: return ioam6h->overflow || @@ -326,6 +282,66 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } + if (ioam6h->type.bit12) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit13) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit14) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit15) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit16) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit17) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit18) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit19) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit20) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit21) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + if (ioam6h->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; @@ -455,26 +471,6 @@ static int str2id(const char *tname) return TEST_OUT_BIT10; if (!strcmp("out_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit12", tname)) - return TEST_OUT_BIT12; - if (!strcmp("out_bit13", tname)) - return TEST_OUT_BIT13; - if (!strcmp("out_bit14", tname)) - return TEST_OUT_BIT14; - if (!strcmp("out_bit15", tname)) - return TEST_OUT_BIT15; - if (!strcmp("out_bit16", tname)) - return TEST_OUT_BIT16; - if (!strcmp("out_bit17", tname)) - return TEST_OUT_BIT17; - if (!strcmp("out_bit18", tname)) - return TEST_OUT_BIT18; - if (!strcmp("out_bit19", tname)) - return TEST_OUT_BIT19; - if (!strcmp("out_bit20", tname)) - return TEST_OUT_BIT20; - if (!strcmp("out_bit21", tname)) - return TEST_OUT_BIT21; if (!strcmp("out_bit22", tname)) return TEST_OUT_BIT22; if (!strcmp("out_full_supp_trace", tname)) @@ -509,26 +505,6 @@ static int str2id(const char *tname) return TEST_IN_BIT10; if (!strcmp("in_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit12", tname)) - return TEST_IN_BIT12; - if (!strcmp("in_bit13", tname)) - return TEST_IN_BIT13; - if (!strcmp("in_bit14", tname)) - return TEST_IN_BIT14; - if (!strcmp("in_bit15", tname)) - return TEST_IN_BIT15; - if (!strcmp("in_bit16", tname)) - return TEST_IN_BIT16; - if (!strcmp("in_bit17", tname)) - return TEST_IN_BIT17; - if (!strcmp("in_bit18", tname)) - return TEST_IN_BIT18; - if (!strcmp("in_bit19", tname)) - return TEST_IN_BIT19; - if (!strcmp("in_bit20", tname)) - return TEST_IN_BIT20; - if (!strcmp("in_bit21", tname)) - return TEST_IN_BIT21; if (!strcmp("in_bit22", tname)) return TEST_IN_BIT22; if (!strcmp("in_full_supp_trace", tname)) @@ -606,16 +582,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_OUT_BIT9] = check_ioam_header_and_data, [TEST_OUT_BIT10] = check_ioam_header_and_data, [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT12] = check_ioam_header, - [TEST_OUT_BIT13] = check_ioam_header, - [TEST_OUT_BIT14] = check_ioam_header, - [TEST_OUT_BIT15] = check_ioam_header, - [TEST_OUT_BIT16] = check_ioam_header, - [TEST_OUT_BIT17] = check_ioam_header, - [TEST_OUT_BIT18] = check_ioam_header, - [TEST_OUT_BIT19] = check_ioam_header, - [TEST_OUT_BIT20] = check_ioam_header, - [TEST_OUT_BIT21] = check_ioam_header, [TEST_OUT_BIT22] = check_ioam_header_and_data, [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_IN_UNDEF_NS] = check_ioam_header, @@ -633,16 +599,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_IN_BIT9] = check_ioam_header_and_data, [TEST_IN_BIT10] = check_ioam_header_and_data, [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT12] = check_ioam_header, - [TEST_IN_BIT13] = check_ioam_header, - [TEST_IN_BIT14] = check_ioam_header, - [TEST_IN_BIT15] = check_ioam_header, - [TEST_IN_BIT16] = check_ioam_header, - [TEST_IN_BIT17] = check_ioam_header, - [TEST_IN_BIT18] = check_ioam_header, - [TEST_IN_BIT19] = check_ioam_header, - [TEST_IN_BIT20] = check_ioam_header, - [TEST_IN_BIT21] = check_ioam_header, [TEST_IN_BIT22] = check_ioam_header_and_data, [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 260336d5f0b1..7569d892967a 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_sockopt pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f1464f09b080..bbf4e448bad9 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -8,7 +8,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt TEST_FILES := settings diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 89c4753c2760..95e81d557b08 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -14,6 +14,7 @@ #include <strings.h> #include <signal.h> #include <unistd.h> +#include <time.h> #include <sys/poll.h> #include <sys/sendfile.h> @@ -64,6 +65,7 @@ static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; +static unsigned int cfg_time; static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; @@ -78,9 +80,10 @@ static struct cfg_cmsg_types cfg_cmsg_types; static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] [-w sec] connect_address\n"); + "[-l] [-w sec] [-t num] [-T num] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-p num -- use port num\n"); @@ -448,7 +451,7 @@ static void set_nonblock(int fd) fcntl(fd, F_SETFL, flags | O_NONBLOCK); } -static int copyfd_io_poll(int infd, int peerfd, int outfd) +static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) { struct pollfd fds = { .fd = peerfd, @@ -487,9 +490,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) */ fds.events &= ~POLLIN; - if ((fds.events & POLLOUT) == 0) + if ((fds.events & POLLOUT) == 0) { + *in_closed_after_out = true; /* and nothing more to send */ break; + } /* Else, still have data to transmit */ } else if (len < 0) { @@ -547,7 +552,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join || cfg_remove) + if (cfg_remove) usleep(cfg_wait); close(peerfd); @@ -646,7 +651,7 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out) { int err; @@ -664,13 +669,14 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, shutdown(peerfd, SHUT_WR); err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out) { int err; @@ -685,6 +691,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; @@ -692,27 +699,62 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, static int copyfd_io(int infd, int peerfd, int outfd) { + bool in_closed_after_out = false; + struct timespec start, end; int file_size; + int ret; + + if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) + xerror("can not fetch start time %d", errno); switch (cfg_mode) { case CFG_MODE_POLL: - return copyfd_io_poll(infd, peerfd, outfd); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); + break; + case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_mmap(infd, peerfd, outfd, file_size); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); + break; + case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_sendfile(infd, peerfd, outfd, file_size); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); + break; + + default: + fprintf(stderr, "Invalid mode %d\n", cfg_mode); + + die_usage(); + return 1; } - fprintf(stderr, "Invalid mode %d\n", cfg_mode); + if (ret) + return ret; - die_usage(); - return 1; + if (cfg_time) { + unsigned int delta_ms; + + if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) + xerror("can not fetch end time %d", errno); + delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; + if (delta_ms > cfg_time) { + xerror("transfer slower than expected! runtime %d ms, expected %d ms", + delta_ms, cfg_time); + } + + /* show the runtime only if this end shutdown(wr) before receiving the EOF, + * (that is, if this end got the longer runtime) + */ + if (in_closed_after_out) + fprintf(stderr, "%d", delta_ms); + } + + return 0; } static void check_sockaddr(int pf, struct sockaddr_storage *ss, @@ -1005,12 +1047,11 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; - cfg_wait = 400000; break; case 'r': cfg_remove = true; @@ -1043,6 +1084,9 @@ static void parse_opts(int argc, char **argv) if (poll_timeout <= 0) poll_timeout = -1; break; + case 'T': + cfg_time = atoi(optarg); + break; case 'm': cfg_mode = parse_mode(optarg); break; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 255793c5ac4f..7ef639a9d4a6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -297,7 +297,7 @@ do_transfer() if [ "$test_link_fail" -eq 2 ];then timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ ${local_addr} < "$sinfail" > "$sout" & else timeout ${timeout_test} \ @@ -945,12 +945,15 @@ subflows_tests() # subflow limited by client reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 0 + ip netns exec $ns2 ./pm_nl_ctl limits 0 0 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "single subflow, limited by client" 0 0 0 # subflow limited by server reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 0 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 @@ -973,7 +976,7 @@ subflows_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows" 2 2 2 - # multiple subflows limited by serverf + # multiple subflows limited by server reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c new file mode 100644 index 000000000000..417b11cafafe --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> + +static int pf = AF_INET; + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +#ifndef MPTCP_INFO +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; +}; + +struct mptcp_subflow_data { + __u32 size_subflow_data; /* size of this structure in userspace */ + __u32 num_subflows; /* must be 0, set by kernel */ + __u32 size_kernel; /* must be 0, set by kernel */ + __u32 size_user; /* size of one element in data[] */ +} __attribute__((aligned(8))); + +struct mptcp_subflow_addrs { + union { + __kernel_sa_family_t sa_family; + struct sockaddr sa_local; + struct sockaddr_in sin_local; + struct sockaddr_in6 sin6_local; + struct __kernel_sockaddr_storage ss_local; + }; + union { + struct sockaddr sa_remote; + struct sockaddr_in sin_remote; + struct sockaddr_in6 sin6_remote; + struct __kernel_sockaddr_storage ss_remote; + }; +}; + +#define MPTCP_INFO 1 +#define MPTCP_TCPINFO 2 +#define MPTCP_SUBFLOW_ADDRS 3 +#endif + +struct so_state { + struct mptcp_info mi; + uint64_t mptcpi_rcv_delta; + uint64_t tcpi_rcv_delta; +}; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_sockopt [-6]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + default: + die_usage(1); + break; + } + } +} + +static void do_getsockopt_bogus_sf_data(int fd, int optname) +{ + struct mptcp_subflow_data good_data; + struct bogus_data { + struct mptcp_subflow_data d; + char buf[2]; + } bd; + socklen_t olen, _olen; + int ret; + + memset(&bd, 0, sizeof(bd)); + memset(&good_data, 0, sizeof(good_data)); + + olen = sizeof(good_data); + good_data.size_subflow_data = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* 0 size_subflow_data */ + assert(olen == sizeof(good_data)); + + bd.d = good_data; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + assert(olen == sizeof(good_data)); + assert(bd.d.num_subflows == 1); + assert(bd.d.size_kernel > 0); + assert(bd.d.size_user == 0); + + bd.d = good_data; + _olen = rand() % olen; + olen = _olen; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* bogus olen */ + assert(olen == _olen); /* must be unchanged */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.size_kernel = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* size_kernel not 0 */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.num_subflows = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* num_subflows not 0 */ + + /* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */ + bd.d = good_data; + olen = sizeof(bd); + bd.d.size_subflow_data = sizeof(bd); + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + + /* olen must be truncated to real data size filled by kernel: */ + assert(olen == sizeof(good_data)); + + assert(bd.d.size_subflow_data == sizeof(bd)); + + bd.d = good_data; + bd.d.size_subflow_data += 1; + bd.d.size_user = 1; + olen = bd.d.size_subflow_data + 1; + _olen = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen); + assert(ret == 0); + + /* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */ + assert(olen == _olen); + + assert(bd.d.size_subflow_data == sizeof(good_data) + 1); + assert(bd.buf[0] == 0); +} + +static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w) +{ + struct mptcp_info i; + socklen_t olen; + int ret; + + olen = sizeof(i); + ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen); + + if (ret < 0) + die_perror("getsockopt MPTCP_INFO"); + + assert(olen == sizeof(i)); + + if (s->mi.mptcpi_write_seq == 0) + s->mi = i; + + assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq); + + s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt; +} + +static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w) +{ + struct my_tcp_info { + struct mptcp_subflow_data d; + struct tcp_info ti[2]; + } ti; + int ret, tries = 5; + socklen_t olen; + + do { + memset(&ti, 0, sizeof(ti)); + + ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + ti.d.size_user = sizeof(struct tcp_info); + olen = sizeof(ti); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen); + if (ret < 0) + xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)"); + + assert(olen <= sizeof(ti)); + assert(ti.d.size_user == ti.d.size_kernel); + assert(ti.d.size_user == sizeof(struct tcp_info)); + assert(ti.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == sizeof(struct tcp_info)); + + if (ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received == r) + goto done; + + if (r == 0 && ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received) { + s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received; + goto done; + } + + /* wait and repeat, might be that tx is still ongoing */ + sleep(1); + } while (tries-- > 0); + + xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu", + ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r); + +done: + do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); +} + +static void do_getsockopt_subflow_addrs(int fd) +{ + struct sockaddr_storage remote, local; + socklen_t olen, rlen, llen; + int ret; + struct my_addrs { + struct mptcp_subflow_data d; + struct mptcp_subflow_addrs addr[2]; + } addrs; + + memset(&addrs, 0, sizeof(addrs)); + memset(&local, 0, sizeof(local)); + memset(&remote, 0, sizeof(remote)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(struct mptcp_subflow_addrs); + olen = sizeof(addrs); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + if (ret < 0) + die_perror("getsockopt MPTCP_SUBFLOW_ADDRS"); + + assert(olen <= sizeof(addrs)); + assert(addrs.d.size_user == addrs.d.size_kernel); + assert(addrs.d.size_user == sizeof(struct mptcp_subflow_addrs)); + assert(addrs.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == sizeof(struct mptcp_subflow_addrs)); + + llen = sizeof(local); + ret = getsockname(fd, (struct sockaddr *)&local, &llen); + if (ret < 0) + die_perror("getsockname"); + rlen = sizeof(remote); + ret = getpeername(fd, (struct sockaddr *)&remote, &rlen); + if (ret < 0) + die_perror("getpeername"); + + assert(rlen > 0); + assert(rlen == llen); + + assert(remote.ss_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); + + memset(&addrs, 0, sizeof(addrs)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(sa_family_t); + olen = sizeof(addrs.d) + sizeof(sa_family_t); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + assert(ret == 0); + assert(olen == sizeof(addrs.d) + sizeof(sa_family_t)); + + assert(addrs.addr[0].sa_family == pf); + assert(addrs.addr[0].sa_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0); + + do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); +} + +static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) +{ + do_getsockopt_mptcp_info(s, fd, w); + + do_getsockopt_tcp_info(s, fd, r, w); + + do_getsockopt_subflow_addrs(fd); +} + +static void connect_one_server(int fd, int pipefd) +{ + char buf[4096], buf2[4096]; + size_t len, i, total; + struct so_state s; + bool eof = false; + ssize_t ret; + + memset(&s, 0, sizeof(s)); + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + do_getsockopts(&s, fd, 0, 0); + + /* un-block server */ + ret = read(pipefd, buf2, 4); + assert(ret == 4); + close(pipefd); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + total = 0; + do { + ret = read(fd, buf2 + total, sizeof(buf2) - total); + if (ret < 0) + die_perror("read"); + if (ret == 0) { + eof = true; + break; + } + + total += ret; + } while (total < len); + + if (total != len) + xerror("total %lu, len %lu eof %d\n", total, len, eof); + + if (memcmp(buf, buf2, len)) + xerror("data corruption"); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta <= total); + + do_getsockopts(&s, fd, ret, ret); + + if (eof) + total += 1; /* sequence advances due to FIN */ + + assert(s.mptcpi_rcv_delta == (uint64_t)total); + close(fd); +} + +static void process_one_client(int fd, int pipefd) +{ + ssize_t ret, ret2, ret3; + struct so_state s; + char buf[4096]; + + memset(&s, 0, sizeof(s)); + do_getsockopts(&s, fd, 0, 0); + + ret = write(pipefd, "xmit", 4); + assert(ret == 4); + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) + die_perror("read"); + + assert(s.mptcpi_rcv_delta <= (uint64_t)ret); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta == (uint64_t)ret); + + ret2 = write(fd, buf, ret); + if (ret2 < 0) + die_perror("write"); + + /* wait for hangup */ + ret3 = read(fd, buf, 1); + if (ret3 != 0) + xerror("expected EOF, got %lu", ret3); + + do_getsockopts(&s, fd, ret, ret2); + if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int pipefd) +{ + int fd = -1, r; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(pipefd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + process_one_client(r, pipefd); + + return 0; +} + +static int client(int pipefd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + connect_one_server(fd, pipefd); + + return 0; +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int pipefds[2]; + + parse_opts(argc, argv); + + e1 = pipe(pipefds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(pipefds[1]); + + close(pipefds[1]); + + /* wait until server bound a socket */ + e1 = read(pipefds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(pipefds[0]); + + close(pipefds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 1579e471a5e7..41de643788b8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -239,12 +239,35 @@ make_file() echo "Created $name (size $size KB) containing data sent by $who" } +do_mptcp_sockopt_tests() +{ + local lret=0 + + ./mptcp_sockopt + lret=$? + + if [ $lret -ne 0 ]; then + echo "FAIL: SOL_MPTCP getsockopt" 1>&2 + ret=$lret + return + fi + + ./mptcp_sockopt -6 + lret=$? + + if [ $lret -ne 0 ]; then + echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2 + ret=$lret + return + fi +} + run_tests() { listener_ns="$1" connector_ns="$2" connect_addr="$3" - lret=0 + local lret=0 do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} @@ -268,9 +291,13 @@ trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 - if [ $ret -eq 0 ];then echo "PASS: all packets had packet mark set" fi +do_mptcp_sockopt_tests +if [ $ret -eq 0 ];then + echo "PASS: SOL_MPTCP getsockopt has expected information" +fi + exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 3c741abe034e..cbacf9f6538b 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -70,7 +70,7 @@ check() check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "defaults limits" +subflows 2" "defaults limits" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo @@ -118,11 +118,11 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" ip netns exec $ns1 ./pm_nl_ctl limits 9 1 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "rcv addrs above hard limit" +subflows 2" "rcv addrs above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 1 9 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "subflows above hard limit" +subflows 2" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 910d8126af8f..f441ff7904fc 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -51,7 +51,7 @@ setup() sout=$(mktemp) cout=$(mktemp) capout=$(mktemp) - size=$((2048 * 4096)) + size=$((2 * 2048 * 4096)) dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 @@ -161,17 +161,15 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \ 0.0.0.0 < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${ns3}" "${port}" - local start - start=$(date +%s%3N) timeout ${timeout_test} \ ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $time \ 10.0.3.3 < "$cin" > "$cout" & local cpid=$! @@ -180,27 +178,20 @@ do_transfer() wait $spid local rets=$? - local stop - stop=$(date +%s%3N) - if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi - local duration - duration=$((stop-start)) - cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%16s" "$duration max $max_time " + printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ - [ $duration -lt $max_time ]; then + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then echo "[ OK ]" cat "$capout" return 0 @@ -244,23 +235,24 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measure in ms - local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) + # time is measured in ms, account for transfer size, affegated link speed + # and header overhead (10%) + local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake - # completion - time=$((time + 1350)) + # completion (see mptcp_connect): 200ms on each side, add some slack + time=$((time + 450)) - printf "%-50s" "$msg" - do_transfer $small $large $((time * 11 / 10)) + printf "%-60s" "$msg" + do_transfer $small $large $time lret=$? if [ $lret -ne 0 ]; then ret=$lret [ $bail -eq 0 ] || exit $ret fi - printf "%-50s" "$msg - reverse direction" - do_transfer $large $small $((time * 11 / 10)) + printf "%-60s" "$msg - reverse direction" + do_transfer $large $small $time lret=$? if [ $lret -ne 0 ]; then ret=$lret diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index bd6288302094..b599003eb5ba 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -28,6 +28,7 @@ #include <unistd.h> #include <time.h> #include <errno.h> +#include <getopt.h> #include <linux/xfrm.h> #include <linux/ipsec.h> @@ -101,6 +102,8 @@ struct sock_args { struct sockaddr_in6 v6; } md5_prefix; unsigned int prefix_len; + /* 0: default, -1: force off, +1: force on */ + int bind_key_ifindex; /* expected addresses and device index for connection */ const char *expected_dev; @@ -271,11 +274,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args } memcpy(&md5sig.tcpm_addr, addr, alen); - if (args->ifindex) { + if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) { opt = TCP_MD5SIG_EXT; md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; md5sig.tcpm_ifindex = args->ifindex; + log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex); + } else { + log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex); } rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig)); @@ -1822,6 +1828,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) } #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" +#define OPT_FORCE_BIND_KEY_IFINDEX 1001 +#define OPT_NO_BIND_KEY_IFINDEX 1002 + +static struct option long_opts[] = { + {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX}, + {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX}, + {0, 0, 0, 0} +}; static void print_usage(char *prog) { @@ -1858,6 +1872,10 @@ static void print_usage(char *prog) " -M password use MD5 sum protection\n" " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" + " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n" + " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n" + " (default: only if -I is passed)\n" + "\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" "\n" @@ -1893,7 +1911,7 @@ int main(int argc, char *argv[]) * process input args */ - while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { + while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) { switch (rc) { case 'B': both_mode = 1; @@ -1966,6 +1984,12 @@ int main(int argc, char *argv[]) case 'M': args.password = optarg; break; + case OPT_FORCE_BIND_KEY_IFINDEX: + args.bind_key_ifindex = 1; + break; + case OPT_NO_BIND_KEY_IFINDEX: + args.bind_key_ifindex = -1; + break; case 'X': args.client_pw = optarg; break; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 97fceb9be9ed..d3047e251fe9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -29,6 +29,8 @@ struct tls_crypto_info_keys { union { struct tls12_crypto_info_aes_gcm_128 aes128; struct tls12_crypto_info_chacha20_poly1305 chacha20; + struct tls12_crypto_info_sm4_gcm sm4gcm; + struct tls12_crypto_info_sm4_ccm sm4ccm; }; size_t len; }; @@ -49,6 +51,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->aes128.info.version = tls_version; tls12->aes128.info.cipher_type = cipher_type; break; + case TLS_CIPHER_SM4_GCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm); + tls12->sm4gcm.info.version = tls_version; + tls12->sm4gcm.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_SM4_CCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm); + tls12->sm4ccm.info.version = tls_version; + tls12->sm4ccm.info.cipher_type = cipher_type; + break; default: break; } @@ -148,13 +160,13 @@ FIXTURE_VARIANT(tls) uint16_t cipher_type; }; -FIXTURE_VARIANT_ADD(tls, 12_gcm) +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) { .tls_version = TLS_1_2_VERSION, .cipher_type = TLS_CIPHER_AES_GCM_128, }; -FIXTURE_VARIANT_ADD(tls, 13_gcm) +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm) { .tls_version = TLS_1_3_VERSION, .cipher_type = TLS_CIPHER_AES_GCM_128, @@ -172,6 +184,18 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, }; +FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_GCM, +}; + +FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_CCM, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 427d94816f2d..d4ffebb989f8 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -199,7 +199,6 @@ fi # test basic connectivity if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash exit 1 fi diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7f..da1c1e4b6c86 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -741,6 +741,149 @@ EOF return $lret } +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_r=$! + + echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_c=$! + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $nc_r $nc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family filter { + chain forward { + type filter hook forward priority 0; policy accept; + meta iif veth1 udp sport 1405 drop + } +} +EOF + test_port_shadow "port-filter" "ROUTER" + + ip netns exec "$ns0" nft delete table $family filter +} + +# This prevents port shadow of router service via notrack. +test_port_shadow_notrack() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family raw { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 udp dport 1405 notrack + udp dport 1405 notrack + } + chain output { + type filter hook output priority -300; policy accept; + udp sport 1405 notrack + } +} +EOF + test_port_shadow "port-notrack" "ROUTER" + + ip netns exec "$ns0" nft delete table $family raw +} + +# This prevents port shadow of router service via sport remap. +test_port_shadow_pat() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family pat { + chain postrouting { + type nat hook postrouting priority -1; policy accept; + meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random + } +} +EOF + test_port_shadow "pat" "ROUTER" + + ip netns exec "$ns0" nft delete table $family pat +} + +test_port_shadowing() +{ + local family="ip" + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" + + # test packet filter based mitigation: prevent forwarding of + # packets claiming to come from the service port. + test_port_shadow_filter "$family" + + # test conntrack based mitigation: connections going or coming + # from router:service bypass connection tracking. + test_port_shadow_notrack "$family" + + # test nat based mitigation: fowarded packets coming from service port + # are masqueraded with random highport. + test_port_shadow_pat "$family" + + ip netns exec "$ns0" nft delete table $family nat +} # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do @@ -861,6 +1004,8 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet +test_port_shadowing + if [ $ret -ne 0 ];then echo -n "FAIL: " nft --version diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index 00197acb7ff1..b0b20e0b4e30 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -13,7 +13,7 @@ function do_one orig=$(cat "$mitigation") - start=$EPOCHSECONDS + start=$(date +%s) now=$start while [[ $((now-start)) -lt "$TIMEOUT" ]] @@ -21,7 +21,7 @@ function do_one echo 0 > "$mitigation" echo 1 > "$mitigation" - now=$EPOCHSECONDS + now=$(date +%s) done echo "$orig" > "$mitigation" diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 8f3e72e626fa..c4e6a34f9657 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -11,6 +11,7 @@ /proc-self-syscall /proc-self-wchan /proc-subset-pid +/proc-tid0 /proc-uptime-001 /proc-uptime-002 /read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1054e40a499a..219fc6113847 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += -D_GNU_SOURCE +LDFLAGS += -pthread TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup @@ -13,6 +14,7 @@ TEST_GEN_PROGS += proc-self-map-files-002 TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan TEST_GEN_PROGS += proc-subset-pid +TEST_GEN_PROGS += proc-tid0 TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read diff --git a/tools/testing/selftests/proc/proc-tid0.c b/tools/testing/selftests/proc/proc-tid0.c new file mode 100644 index 000000000000..58c1d7c90a8e --- /dev/null +++ b/tools/testing/selftests/proc/proc-tid0.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that /proc/*/task never contains "0". +#include <sys/types.h> +#include <dirent.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> + +static pid_t pid = -1; + +static void atexit_hook(void) +{ + if (pid > 0) { + kill(pid, SIGKILL); + } +} + +static void *f(void *_) +{ + return NULL; +} + +static void sigalrm(int _) +{ + exit(0); +} + +int main(void) +{ + pid = fork(); + if (pid == 0) { + /* child */ + while (1) { + pthread_t pth; + pthread_create(&pth, NULL, f, NULL); + pthread_join(pth, NULL); + } + } else if (pid > 0) { + /* parent */ + atexit(atexit_hook); + + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/task", pid); + + signal(SIGALRM, sigalrm); + alarm(1); + + while (1) { + DIR *d = opendir(buf); + struct dirent *de; + while ((de = readdir(d))) { + if (strcmp(de->d_name, "0") == 0) { + exit(1); + } + } + closedir(d); + } + + return 0; + } else { + perror("fork"); + return 1; + } +} diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 03126eb6ec5a..c7d42ef80c53 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -149,6 +149,7 @@ do done # Download and expand the tarball on all systems. +echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log" for i in $systems do echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index f442d84fb2a3..6cf9ec6a3d1c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -184,7 +184,7 @@ do TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG ;; --kcsan) - TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 363f56081eff..eae88aacca2a 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -351,7 +351,7 @@ fi if test "$do_scftorture" = "yes" then torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot" - torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make fi if test "$do_refscale" = yes @@ -434,7 +434,12 @@ then batchno=1 if test -s $T/xz-todo then - echo Size before compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log + for i in `cat $T/xz-todo` + do + find $i -name 'vmlinux*' -print + done | wc -l | awk '{ print $1 }' > $T/xz-todo-count + n2compress="`cat $T/xz-todo-count`" + echo Size before compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log for i in `cat $T/xz-todo` do echo Compressing vmlinux files in ${i}: `date` >> "$tdir/log-xz" 2>&1 @@ -456,7 +461,7 @@ then echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log fi wait - echo Size after compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log + echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log else echo No compression needed: `date` >> "$tdir/log-xz" 2>&1 diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 7db9cf822dc7..8109b17dc764 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -62,6 +62,17 @@ enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID}; const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES; +struct child_args { + int num_threads; + int pfd[2]; + int cpid; + int thr_tids[MAX_THREADS]; +}; + +static struct child_args procs[MAX_PROCESSES]; +static int num_processes = 2; +static int need_cleanup = 0; + static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { @@ -78,8 +89,14 @@ static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned l #define handle_error(msg) __handle_error(__FILE__, __LINE__, msg) static void __handle_error(char *fn, int ln, char *msg) { + int pidx; printf("(%s:%d) - ", fn, ln); perror(msg); + if (need_cleanup) { + for (pidx = 0; pidx < num_processes; ++pidx) + kill(procs[pidx].cpid, 15); + need_cleanup = 0; + } exit(EXIT_FAILURE); } @@ -106,13 +123,6 @@ static unsigned long get_cs_cookie(int pid) return cookie; } -struct child_args { - int num_threads; - int pfd[2]; - int cpid; - int thr_tids[MAX_THREADS]; -}; - static int child_func_thread(void __attribute__((unused))*arg) { while (1) @@ -212,10 +222,7 @@ void _validate(int line, int val, char *msg) int main(int argc, char *argv[]) { - struct child_args procs[MAX_PROCESSES]; - int keypress = 0; - int num_processes = 2; int num_threads = 3; int delay = 0; int res = 0; @@ -262,6 +269,7 @@ int main(int argc, char *argv[]) printf("\n## Create a thread/process/process group hiearchy\n"); create_processes(num_processes, num_threads, procs); + need_cleanup = 1; disp_processes(num_processes, procs); validate(get_cs_cookie(0) == 0); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1d64891e6492..d425688cf59c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -276,12 +276,12 @@ int seccomp(unsigned int op, unsigned int flags, void *args) } #endif -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) #else -#error "wut? Unknown __BYTE_ORDER?!" +#error "wut? Unknown __BYTE_ORDER__?!" #endif #define SIBLING_EXIT_UNKILLED 0xbadbeef diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index b02eac613fdd..2e7e86e85282 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only hugepage-mmap +hugepage-mremap hugepage-shm khugepaged map_hugetlb diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d9605bd10f2d..1607322a112c 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -29,6 +29,7 @@ TEST_GEN_FILES = compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += khugepaged TEST_GEN_FILES += madv_populate diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c new file mode 100644 index 000000000000..257df94697a5 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * hugepage-mremap: + * + * Example of remapping huge page memory in a user application using the + * mremap system call. Code assumes a hugetlbfs filesystem is mounted + * at './huge'. The code will use 10MB worth of huge pages. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <errno.h> +#include <fcntl.h> /* Definition of O_* constants */ +#include <sys/syscall.h> /* Definition of SYS_* constants */ +#include <linux/userfaultfd.h> +#include <sys/ioctl.h> + +#define LENGTH (1UL * 1024 * 1024 * 1024) + +#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) +#define FLAGS (MAP_SHARED | MAP_ANONYMOUS) + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +static void register_region_with_uffd(char *addr, size_t len) +{ + long uffd; /* userfaultfd file descriptor */ + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + + /* Create and enable userfaultfd object. */ + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + perror("userfaultfd"); + exit(1); + } + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + perror("ioctl-UFFDIO_API"); + exit(1); + } + + /* Create a private anonymous mapping. The memory will be + * demand-zero paged--that is, not yet allocated. When we + * actually touch the memory, it will be allocated via + * the userfaultfd. + */ + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Address returned by mmap() = %p\n", addr); + + /* Register the memory range of the mapping we just created for + * handling by the userfaultfd object. In mode, we request to track + * missing pages (i.e., pages that have not yet been faulted in). + */ + + uffdio_register.range.start = (unsigned long)addr; + uffdio_register.range.len = len; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + perror("ioctl-UFFDIO_REGISTER"); + exit(1); + } +} + +int main(void) +{ + int ret = 0; + + int fd = open("/huge/test", O_CREAT | O_RDWR, 0755); + + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ + unsigned long suggested_addr = 0x7eaa40000000; + void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); + printf("Map haddr: Returned address is %p\n", haddr); + if (haddr == MAP_FAILED) { + perror("mmap1"); + exit(1); + } + + /* mmap again to a dummy address to hopefully trigger pmd sharing. */ + suggested_addr = 0x7daa40000000; + void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); + printf("Map daddr: Returned address is %p\n", daddr); + if (daddr == MAP_FAILED) { + perror("mmap3"); + exit(1); + } + + suggested_addr = 0x7faa40000000; + void *vaddr = + mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0); + printf("Map vaddr: Returned address is %p\n", vaddr); + if (vaddr == MAP_FAILED) { + perror("mmap2"); + exit(1); + } + + register_region_with_uffd(haddr, LENGTH); + + void *addr = mremap(haddr, LENGTH, LENGTH, + MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); + if (addr == MAP_FAILED) { + perror("mremap"); + exit(1); + } + + printf("Mremap: Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + + return ret; +} diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index b61dcdb44c5b..1436e1a9a3d3 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -5,6 +5,10 @@ #include <time.h> #include <string.h> #include <numa.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdint.h> +#include <err.h> #include "../kselftest.h" #include "../../../../include/vdso/time64.h" @@ -18,6 +22,15 @@ #define KSM_MERGE_ACROSS_NODES_DEFAULT true #define MB (1ul << 20) +#define PAGE_SHIFT 12 +#define HPAGE_SHIFT 21 + +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define HPAGE_SIZE (1 << HPAGE_SHIFT) + +#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0) +#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1)) + struct ksm_sysfs { unsigned long max_page_sharing; unsigned long merge_across_nodes; @@ -34,6 +47,7 @@ enum ksm_test_name { CHECK_KSM_ZERO_PAGE_MERGE, CHECK_KSM_NUMA_MERGE, KSM_MERGE_TIME, + KSM_MERGE_TIME_HUGE_PAGES, KSM_COW_TIME }; @@ -100,6 +114,9 @@ static void print_help(void) " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" + " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n" " -C evaluate the time required to break COW of merged pages.\n\n"); printf(" -a: specify the access protections of pages.\n" @@ -354,12 +371,34 @@ err_out: return KSFT_FAIL; } +static int get_next_mem_node(int node) +{ + + long node_size; + int mem_node = 0; + int i, max_node = numa_max_node(); + + for (i = node + 1; i <= max_node + node; i++) { + mem_node = i % (max_node + 1); + node_size = numa_node_size(mem_node, NULL); + if (node_size > 0) + break; + } + return mem_node; +} + +static int get_first_mem_node(void) +{ + return get_next_mem_node(numa_max_node()); +} + static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes, size_t page_size) { void *numa1_map_ptr, *numa2_map_ptr; struct timespec start_time; int page_count = 2; + int first_node; if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { perror("clock_gettime"); @@ -370,7 +409,7 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a perror("NUMA support not enabled"); return KSFT_SKIP; } - if (numa_max_node() < 1) { + if (numa_num_configured_nodes() <= 1) { printf("At least 2 NUMA nodes must be available\n"); return KSFT_SKIP; } @@ -378,8 +417,9 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a return KSFT_FAIL; /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */ - numa1_map_ptr = numa_alloc_onnode(page_size, 0); - numa2_map_ptr = numa_alloc_onnode(page_size, 1); + first_node = get_first_mem_node(); + numa1_map_ptr = numa_alloc_onnode(page_size, first_node); + numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node)); if (!numa1_map_ptr || !numa2_map_ptr) { perror("numa_alloc_onnode"); return KSFT_FAIL; @@ -416,6 +456,101 @@ err_out: return KSFT_FAIL; } +int64_t allocate_transhuge(void *ptr, int pagemap_fd) +{ + uint64_t ent[2]; + + /* drop pmd */ + if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | + MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) + errx(2, "mmap transhuge"); + + if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + /* allocate transparent huge page */ + *(volatile void **)ptr = ptr; + + if (pread(pagemap_fd, ent, sizeof(ent), + (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent)) + err(2, "read pagemap"); + + if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && + PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && + !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1))) + return PAGEMAP_PFN(ent[0]); + + return -1; +} + +static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr, *map_ptr_orig; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + int pagemap_fd, n_normal_pages, n_huge_pages; + + map_size *= MB; + size_t len = map_size; + + len -= len % HPAGE_SIZE; + map_ptr_orig = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0); + map_ptr = map_ptr_orig + HPAGE_SIZE - (uintptr_t)map_ptr_orig % HPAGE_SIZE; + + if (map_ptr_orig == MAP_FAILED) + err(2, "initial mmap"); + + if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + err(2, "open pagemap"); + + n_normal_pages = 0; + n_huge_pages = 0; + for (void *p = map_ptr; p < map_ptr + len; p += HPAGE_SIZE) { + if (allocate_transhuge(p, pagemap_fd) < 0) + n_normal_pages++; + else + n_huge_pages++; + } + printf("Number of normal pages: %d\n", n_normal_pages); + printf("Number of huge pages: %d\n", n_huge_pages); + + memset(map_ptr, '*', len); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr_orig, len + HPAGE_SIZE); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr_orig, len + HPAGE_SIZE); + return KSFT_FAIL; +} + static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size) { void *map_ptr; @@ -541,7 +676,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -595,6 +730,9 @@ int main(int argc, char *argv[]) case 'P': test_name = KSM_MERGE_TIME; break; + case 'H': + test_name = KSM_MERGE_TIME_HUGE_PAGES; + break; case 'C': test_name = KSM_COW_TIME; break; @@ -647,6 +785,14 @@ int main(int argc, char *argv[]) ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_MERGE_TIME_HUGE_PAGES: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, + ksm_scan_limit_sec, size_MB); + break; case KSM_COW_TIME: ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index b959e4ebdad4..3ee0e8275600 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -14,12 +14,11 @@ #include <unistd.h> #include <errno.h> #include <fcntl.h> +#include <linux/mman.h> #include <sys/mman.h> #include "../kselftest.h" -#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) - /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ @@ -328,15 +327,3 @@ int main(int argc, char **argv) err, ksft_test_num()); return ksft_exit_pass(); } - -#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ - -#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition" - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n"); -} - -#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 45e803af7c77..a24d30af3094 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -108,6 +108,17 @@ else echo "[PASS]" fi +echo "-----------------------" +echo "running hugepage-mremap" +echo "-----------------------" +./hugepage-mremap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 1af16d2c2a0a..52497b7b9f1d 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -341,7 +341,7 @@ void split_file_backed_thp(void) } /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); close(fd); if (num_written < 1) { diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c index fd7f1b4a96f9..5e4c036f6ad3 100644 --- a/tools/testing/selftests/vm/transhuge-stress.c +++ b/tools/testing/selftests/vm/transhuge-stress.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) warnx("allocate %zd transhuge pages, using %zd MiB virtual memory" " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20, - len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); + ram >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 10ab56c2484a..8a09057d2f22 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -57,6 +57,7 @@ #include <assert.h> #include <inttypes.h> #include <stdint.h> +#include <sys/random.h> #include "../kselftest.h" @@ -307,37 +308,24 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) } struct uffd_test_ops { - unsigned long expected_ioctls; void (*allocate_area)(void **alloc_area); void (*release_pages)(char *rel_area); void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); }; -#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE)) - -#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE) | \ - (1 << _UFFDIO_WRITEPROTECT)) - static struct uffd_test_ops anon_uffd_test_ops = { - .expected_ioctls = ANON_EXPECTED_IOCTLS, .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, .alias_mapping = noop_alias_mapping, }; static struct uffd_test_ops shmem_uffd_test_ops = { - .expected_ioctls = SHMEM_EXPECTED_IOCTLS, .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, .alias_mapping = shmem_alias_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE), .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, @@ -345,6 +333,43 @@ static struct uffd_test_ops hugetlb_uffd_test_ops = { static struct uffd_test_ops *uffd_test_ops; +static inline uint64_t uffd_minor_feature(void) +{ + if (test_type == TEST_HUGETLB && map_shared) + return UFFD_FEATURE_MINOR_HUGETLBFS; + else if (test_type == TEST_SHMEM) + return UFFD_FEATURE_MINOR_SHMEM; + else + return 0; +} + +static uint64_t get_expected_ioctls(uint64_t mode) +{ + uint64_t ioctls = UFFD_API_RANGE_IOCTLS; + + if (test_type == TEST_HUGETLB) + ioctls &= ~(1 << _UFFDIO_ZEROPAGE); + + if (!((mode & UFFDIO_REGISTER_MODE_WP) && test_uffdio_wp)) + ioctls &= ~(1 << _UFFDIO_WRITEPROTECT); + + if (!((mode & UFFDIO_REGISTER_MODE_MINOR) && test_uffdio_minor)) + ioctls &= ~(1 << _UFFDIO_CONTINUE); + + return ioctls; +} + +static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls) +{ + uint64_t expected = get_expected_ioctls(mode); + uint64_t actual = ioctls & expected; + + if (actual != expected) { + err("missing ioctl(s): expected %"PRIx64" actual: %"PRIx64, + expected, actual); + } +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; @@ -405,7 +430,7 @@ static void uffd_test_ctx_clear(void) munmap_area((void **)&area_dst_alias); } -static void uffd_test_ctx_init_ext(uint64_t *features) +static void uffd_test_ctx_init(uint64_t features) { unsigned long nr, cpu; @@ -414,10 +439,7 @@ static void uffd_test_ctx_init_ext(uint64_t *features) uffd_test_ops->allocate_area((void **)&area_src); uffd_test_ops->allocate_area((void **)&area_dst); - uffd_test_ops->release_pages(area_src); - uffd_test_ops->release_pages(area_dst); - - userfaultfd_open(features); + userfaultfd_open(&features); count_verify = malloc(nr_pages * sizeof(unsigned long long)); if (!count_verify) @@ -437,6 +459,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features) *(area_count(area_src, nr) + 1) = 1; } + /* + * After initialization of area_src, we must explicitly release pages + * for area_dst to make sure it's fully empty. Otherwise we could have + * some area_dst pages be errornously initialized with zero pages, + * hence we could hit memory corruption later in the test. + * + * One example is when THP is globally enabled, above allocate_area() + * calls could have the two areas merged into a single VMA (as they + * will have the same VMA flags so they're mergeable). When we + * initialize the area_src above, it's possible that some part of + * area_dst could have been faulted in via one huge THP that will be + * shared between area_src and area_dst. It could cause some of the + * area_dst won't be trapped by missing userfaults. + * + * This release_pages() will guarantee even if that happened, we'll + * proactively split the thp and drop any accidentally initialized + * pages within area_dst. + */ + uffd_test_ops->release_pages(area_dst); + pipefd = malloc(sizeof(int) * nr_cpus * 2); if (!pipefd) err("pipefd"); @@ -445,11 +487,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features) err("pipe"); } -static inline void uffd_test_ctx_init(uint64_t features) -{ - uffd_test_ctx_init_ext(&features); -} - static int my_bcmp(char *str1, char *str2, size_t n) { unsigned long i; @@ -501,22 +538,10 @@ static void continue_range(int ufd, __u64 start, __u64 len) static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; - struct random_data rand; unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ - int32_t rand_nr; unsigned long long count; - char randstate[64]; - unsigned int seed; - if (bounces & BOUNCE_RANDOM) { - seed = (unsigned int) time(NULL) - bounces; - if (!(bounces & BOUNCE_RACINGFAULTS)) - seed += cpu; - bzero(&rand, sizeof(rand)); - bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - err("initstate_r failed"); - } else { + if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) page_nr += cpu * nr_pages_per_cpu; @@ -524,15 +549,8 @@ static void *locking_thread(void *arg) while (!finished) { if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - err("random_r failed"); - page_nr = rand_nr; - if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - err("random_r failed"); - page_nr |= (((unsigned long) rand_nr) << 16) << - 16; - } + if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) + err("getrandom failed"); } else page_nr += 1; page_nr %= nr_pages; @@ -1013,11 +1031,9 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) { struct uffdio_zeropage uffdio_zeropage; int ret; - unsigned long has_zeropage; + bool has_zeropage = get_expected_ioctls(0) & (1 << _UFFDIO_ZEROPAGE); __s64 res; - has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - if (offset >= nr_pages * page_size) err("unexpected offset %lu", offset); uffdio_zeropage.range.start = (unsigned long) area_dst + offset; @@ -1057,7 +1073,6 @@ static int uffdio_zeropage(int ufd, unsigned long offset) static int userfaultfd_zeropage_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; printf("testing UFFDIO_ZEROPAGE: "); fflush(stdout); @@ -1072,9 +1087,8 @@ static int userfaultfd_zeropage_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (uffdio_zeropage(uffd, 0)) if (my_bcmp(area_dst, zeropage, page_size)) @@ -1087,7 +1101,6 @@ static int userfaultfd_zeropage_test(void) static int userfaultfd_events_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; pthread_t uffd_mon; int err, features; pid_t pid; @@ -1111,9 +1124,8 @@ static int userfaultfd_events_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) err("uffd_poll_thread create"); @@ -1141,7 +1153,6 @@ static int userfaultfd_events_test(void) static int userfaultfd_sig_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; unsigned long userfaults; pthread_t uffd_mon; int err, features; @@ -1165,9 +1176,8 @@ static int userfaultfd_sig_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (faulting_process(1)) err("faulting process failed"); @@ -1202,14 +1212,12 @@ static int userfaultfd_sig_test(void) static int userfaultfd_minor_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; unsigned long p; pthread_t uffd_mon; uint8_t expected_byte; void *expected_page; char c; struct uffd_stats stats = { 0 }; - uint64_t req_features, features_out; if (!test_uffdio_minor) return 0; @@ -1217,21 +1225,7 @@ static int userfaultfd_minor_test(void) printf("testing minor faults: "); fflush(stdout); - if (test_type == TEST_HUGETLB) - req_features = UFFD_FEATURE_MINOR_HUGETLBFS; - else if (test_type == TEST_SHMEM) - req_features = UFFD_FEATURE_MINOR_SHMEM; - else - return 1; - - features_out = req_features; - uffd_test_ctx_init_ext(&features_out); - /* If kernel reports required features aren't supported, skip test. */ - if ((features_out & req_features) != req_features) { - printf("skipping test due to lack of feature support\n"); - fflush(stdout); - return 0; - } + uffd_test_ctx_init(uffd_minor_feature()); uffdio_register.range.start = (unsigned long)area_dst_alias; uffdio_register.range.len = nr_pages * page_size; @@ -1239,10 +1233,8 @@ static int userfaultfd_minor_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - expected_ioctls |= 1 << _UFFDIO_CONTINUE; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl(s)"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); /* * After registering with UFFD, populate the non-UFFD-registered side of @@ -1439,8 +1431,6 @@ static int userfaultfd_stress(void) pthread_attr_setstacksize(&attr, 16*1024*1024); while (bounces--) { - unsigned long expected_ioctls; - printf("bounces: %d, mode:", bounces); if (bounces & BOUNCE_RANDOM) printf(" rnd"); @@ -1468,10 +1458,8 @@ static int userfaultfd_stress(void) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (area_dst_alias) { uffdio_register.range.start = (unsigned long) @@ -1592,6 +1580,8 @@ unsigned long default_huge_page_size(void) static void set_test_type(const char *type) { + uint64_t features = UFFD_API_FEATURES; + if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; @@ -1625,6 +1615,22 @@ static void set_test_type(const char *type) if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 > page_size) err("Impossible to run this test"); + + /* + * Whether we can test certain features depends not just on test type, + * but also on whether or not this particular kernel supports the + * feature. + */ + + userfaultfd_open(&features); + + test_uffdio_wp = test_uffdio_wp && + (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + test_uffdio_minor = test_uffdio_minor && + (features & uffd_minor_feature()); + + close(uffd); + uffd = -1; } static void sigalrm(int sig) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index b4142cd1c5c2..8a1f62ab3c8e 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header + corrupt_xstate_header amx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c new file mode 100644 index 000000000000..3615ef4a48bb --- /dev/null +++ b/tools/testing/selftests/x86/amx.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <err.h> +#include <errno.h> +#include <pthread.h> +#include <setjmp.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <x86intrin.h> + +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/syscall.h> +#include <sys/wait.h> + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define XSAVE_HDR_OFFSET 512 +#define XSAVE_HDR_SIZE 64 + +struct xsave_buffer { + union { + struct { + char legacy[XSAVE_HDR_OFFSET]; + char header[XSAVE_HDR_SIZE]; + char extended[0]; + }; + char bytes[0]; + }; +}; + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + asm volatile("cpuid;" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsave (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xrstor (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); +} + +/* err() exits and will not return */ +#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__) + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) +#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) +static inline void check_cpuid_xsave(void) +{ + uint32_t eax, ebx, ecx, edx; + + /* + * CPUID.1:ECX.XSAVE[bit 26] enumerates general + * support for the XSAVE feature set, including + * XGETBV. + */ + eax = 1; + ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) + fatal_error("cpuid: no CPU xsave support"); + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) + fatal_error("cpuid: no OS xsave support"); +} + +static uint32_t xbuf_size; + +static struct { + uint32_t xbuf_offset; + uint32_t size; +} xtiledata; + +#define CPUID_LEAF_XSTATE 0xd +#define CPUID_SUBLEAF_XSTATE_USER 0x0 +#define TILE_CPUID 0x1d +#define TILE_PALETTE_ID 0x1 + +static void check_cpuid_xtiledata(void) +{ + uint32_t eax, ebx, ecx, edx; + + eax = CPUID_LEAF_XSTATE; + ecx = CPUID_SUBLEAF_XSTATE_USER; + cpuid(&eax, &ebx, &ecx, &edx); + + /* + * EBX enumerates the size (in bytes) required by the XSAVE + * instruction for an XSAVE area containing all the user state + * components corresponding to bits currently set in XCR0. + * + * Stash that off so it can be used to allocate buffers later. + */ + xbuf_size = ebx; + + eax = CPUID_LEAF_XSTATE; + ecx = XFEATURE_XTILEDATA; + + cpuid(&eax, &ebx, &ecx, &edx); + /* + * eax: XTILEDATA state component size + * ebx: XTILEDATA state component offset in user buffer + */ + if (!eax || !ebx) + fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", + eax, ebx); + + xtiledata.size = eax; + xtiledata.xbuf_offset = ebx; +} + +/* The helpers for managing XSAVE buffer and tile states: */ + +struct xsave_buffer *alloc_xbuf(void) +{ + struct xsave_buffer *xbuf; + + /* XSAVE buffer should be 64B-aligned. */ + xbuf = aligned_alloc(64, xbuf_size); + if (!xbuf) + fatal_error("aligned_alloc()"); + return xbuf; +} + +static inline void clear_xstate_header(struct xsave_buffer *buffer) +{ + memset(&buffer->header, 0, sizeof(buffer->header)); +} + +static inline uint64_t get_xstatebv(struct xsave_buffer *buffer) +{ + /* XSTATE_BV is at the beginning of the header: */ + return *(uint64_t *)&buffer->header; +} + +static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) +{ + /* XSTATE_BV is at the beginning of the header: */ + *(uint64_t *)(&buffer->header) = bv; +} + +static void set_rand_tiledata(struct xsave_buffer *xbuf) +{ + int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset]; + int data; + int i; + + /* + * Ensure that 'data' is never 0. This ensures that + * the registers are never in their initial configuration + * and thus never tracked as being in the init state. + */ + data = rand() | 1; + + for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++) + *ptr = data; +} + +struct xsave_buffer *stashed_xsave; + +static void init_stashed_xsave(void) +{ + stashed_xsave = alloc_xbuf(); + if (!stashed_xsave) + fatal_error("failed to allocate stashed_xsave\n"); + clear_xstate_header(stashed_xsave); +} + +static void free_stashed_xsave(void) +{ + free(stashed_xsave); +} + +/* See 'struct _fpx_sw_bytes' at sigcontext.h */ +#define SW_BYTES_OFFSET 464 +/* N.B. The struct's field name varies so read from the offset. */ +#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8) + +static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer) +{ + return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET); +} + +static inline uint64_t get_fpx_sw_bytes_features(void *buffer) +{ + return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET); +} + +/* Work around printf() being unsafe in signals: */ +#define SIGNAL_BUF_LEN 1000 +char signal_message_buffer[SIGNAL_BUF_LEN]; +void sig_print(char *msg) +{ + int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1; + + strncat(signal_message_buffer, msg, left); +} + +static volatile bool noperm_signaled; +static int noperm_errs; +/* + * Signal handler for when AMX is used but + * permission has not been obtained. + */ +static void handle_noperm(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + void *xbuf = ctx->uc_mcontext.fpregs; + struct _fpx_sw_bytes *sw_bytes; + uint64_t features; + + /* Reset the signal message buffer: */ + signal_message_buffer[0] = '\0'; + sig_print("\tAt SIGILL handler,\n"); + + if (si->si_code != ILL_ILLOPC) { + noperm_errs++; + sig_print("[FAIL]\tInvalid signal code.\n"); + } else { + sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n"); + } + + sw_bytes = get_fpx_sw_bytes(xbuf); + /* + * Without permission, the signal XSAVE buffer should not + * have room for AMX register state (aka. xtiledata). + * Check that the size does not overlap with where xtiledata + * will reside. + * + * This also implies that no state components *PAST* + * XTILEDATA (features >=19) can be present in the buffer. + */ + if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) { + sig_print("[OK]\tValid xstate size\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate size\n"); + } + + features = get_fpx_sw_bytes_features(xbuf); + /* + * Without permission, the XTILEDATA feature + * bit should not be set. + */ + if ((features & XFEATURE_MASK_XTILEDATA) == 0) { + sig_print("[OK]\tValid xstate mask\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate mask\n"); + } + + noperm_signaled = true; + ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */ +} + +/* Return true if XRSTOR is successful; otherwise, false. */ +static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask) +{ + noperm_signaled = false; + xrstor(xbuf, mask); + + /* Print any messages produced by the signal code: */ + printf("%s", signal_message_buffer); + /* + * Reset the buffer to make sure any future printing + * only outputs new messages: + */ + signal_message_buffer[0] = '\0'; + + if (noperm_errs) + fatal_error("saw %d errors in noperm signal handler\n", noperm_errs); + + return !noperm_signaled; +} + +/* + * Use XRSTOR to populate the XTILEDATA registers with + * random data. + * + * Return true if successful; otherwise, false. + */ +static inline bool load_rand_tiledata(struct xsave_buffer *xbuf) +{ + clear_xstate_header(xbuf); + set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA); + set_rand_tiledata(xbuf); + return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA); +} + +/* Return XTILEDATA to its initial configuration. */ +static inline void init_xtiledata(void) +{ + clear_xstate_header(stashed_xsave); + xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA); +} + +enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED }; + +/* arch_prctl() and sigaltstack() test */ + +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 + +static void req_xtiledata_perm(void) +{ + syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); +} + +static void validate_req_xcomp_perm(enum expected_result exp) +{ + unsigned long bitmask; + long rc; + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n"); + return; + } + + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n"); + } else if (rc) { + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n"); + } + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); + if (rc) { + fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); + } else if (bitmask & XFEATURE_MASK_XTILE) { + printf("\tARCH_REQ_XCOMP_PERM is successful.\n"); + } +} + +static void validate_xcomp_perm(enum expected_result exp) +{ + bool load_success = load_rand_tiledata(stashed_xsave); + + if (exp == FAIL_EXPECTED) { + if (load_success) { + noperm_errs++; + printf("[FAIL]\tLoad tiledata succeeded.\n"); + } else { + printf("[OK]\tLoad tiledata failed.\n"); + } + } else if (exp == SUCCESS_EXPECTED) { + if (load_success) { + printf("[OK]\tLoad tiledata succeeded.\n"); + } else { + noperm_errs++; + printf("[FAIL]\tLoad tiledata failed.\n"); + } + } +} + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static void *alloc_altstack(unsigned int size) +{ + void *altstack; + + altstack = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + + if (altstack == MAP_FAILED) + fatal_error("mmap() for altstack"); + + return altstack; +} + +static void setup_altstack(void *addr, unsigned long size, enum expected_result exp) +{ + stack_t ss; + int rc; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = addr; + + rc = sigaltstack(&ss, NULL); + + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tsigaltstack() failed.\n"); + } else { + fatal_error("sigaltstack() succeeded unexpectedly.\n"); + } + } else if (rc) { + fatal_error("sigaltstack()"); + } +} + +static void test_dynamic_sigaltstack(void) +{ + unsigned int small_size, enough_size; + unsigned long minsigstksz; + void *altstack; + + minsigstksz = getauxval(AT_MINSIGSTKSZ); + printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz); + /* + * getauxval() itself can return 0 for failure or + * success. But, in this case, AT_MINSIGSTKSZ + * will always return a >=0 value if implemented. + * Just check for 0. + */ + if (minsigstksz == 0) { + printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n"); + return; + } + + enough_size = minsigstksz * 2; + + altstack = alloc_altstack(enough_size); + printf("\tAllocate memory for altstack (%u bytes).\n", enough_size); + + /* + * Try setup_altstack() with a size which can not fit + * XTILEDATA. ARCH_REQ_XCOMP_PERM should fail. + */ + small_size = minsigstksz - xtiledata.size; + printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(FAIL_EXPECTED); + + /* + * Try setup_altstack() with a size derived from + * AT_MINSIGSTKSZ. It should be more than large enough + * and thus ARCH_REQ_XCOMP_PERM should succeed. + */ + printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size); + setup_altstack(altstack, enough_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(SUCCESS_EXPECTED); + + /* + * Try to coerce setup_altstack() to again accept a + * too-small altstack. This ensures that big-enough + * sigaltstacks can not shrink to a too-small value + * once XTILEDATA permission is established. + */ + printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, FAIL_EXPECTED); +} + +static void test_dynamic_state(void) +{ + pid_t parent, child, grandchild; + + parent = fork(); + if (parent < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (parent > 0) { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test parent exit"); + return; + } + /* fork() succeeded. Now in the child . */ + + printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n"); + + printf("\tFork a child.\n"); + child = fork(); + if (child < 0) { + fatal_error("fork"); + } else if (child > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test child exit"); + _exit(0); + } + + /* + * The permission request should fail without an + * XTILEDATA-compatible signal stack + */ + printf("\tTest XCOMP_PERM at child.\n"); + validate_xcomp_perm(FAIL_EXPECTED); + + /* + * Set up an XTILEDATA-compatible signal stack and + * also obtain permission to populate XTILEDATA. + */ + printf("\tTest dynamic sigaltstack at child:\n"); + test_dynamic_sigaltstack(); + + /* Ensure that XTILEDATA can be populated. */ + printf("\tTest XCOMP_PERM again at child.\n"); + validate_xcomp_perm(SUCCESS_EXPECTED); + + printf("\tFork a grandchild.\n"); + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (!grandchild) { + /* fork() succeeded. Now in the (grand)child. */ + printf("\tTest XCOMP_PERM at grandchild.\n"); + + /* + * Ensure that the grandchild inherited + * permission and a compatible sigaltstack: + */ + validate_xcomp_perm(SUCCESS_EXPECTED); + } else { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grandchild"); + } + + _exit(0); +} + +/* + * Save current register state and compare it to @xbuf1.' + * + * Returns false if @xbuf1 matches the registers. + * Returns true if @xbuf1 differs from the registers. + */ +static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) +{ + struct xsave_buffer *xbuf2; + int ret; + + xbuf2 = alloc_xbuf(); + if (!xbuf2) + fatal_error("failed to allocate XSAVE buffer\n"); + + xsave(xbuf2, XFEATURE_MASK_XTILEDATA); + ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], + &xbuf2->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + free(xbuf2); + + if (ret == 0) + return false; + return true; +} + +static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret != 0) + fatal_error("TILEDATA registers changed"); +} + +static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret == 0) + fatal_error("TILEDATA registers did not change"); +} + +/* tiledata inheritance test */ + +static void test_fork(void) +{ + pid_t child, grandchild; + + child = fork(); + if (child < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (child > 0) { + /* fork() succeeded. Now in the parent. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test child"); + return; + } + /* fork() succeeded. Now in the child. */ + printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n"); + + load_rand_tiledata(stashed_xsave); + + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (grandchild > 0) { + /* fork() succeeded. Still in the first child. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grand child"); + _exit(0); + } + /* fork() succeeded. Now in the (grand)child. */ + + /* + * TILEDATA registers are not preserved across fork(). + * Ensure that their value has changed: + */ + validate_tiledata_regs_changed(stashed_xsave); + + _exit(0); +} + +/* Context switching test */ + +static struct _ctxtswtest_cfg { + unsigned int iterations; + unsigned int num_threads; +} ctxtswtest_config; + +struct futex_info { + pthread_t thread; + int nr; + pthread_mutex_t mutex; + struct futex_info *next; +}; + +static void *check_tiledata(void *info) +{ + struct futex_info *finfo = (struct futex_info *)info; + struct xsave_buffer *xbuf; + int i; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + /* + * Load random data into 'xbuf' and then restore + * it to the tile registers themselves. + */ + load_rand_tiledata(xbuf); + for (i = 0; i < ctxtswtest_config.iterations; i++) { + pthread_mutex_lock(&finfo->mutex); + + /* + * Ensure the register values have not + * diverged from those recorded in 'xbuf'. + */ + validate_tiledata_regs_same(xbuf); + + /* Load new, random values into xbuf and registers */ + load_rand_tiledata(xbuf); + + /* + * The last thread's last unlock will be for + * thread 0's mutex. However, thread 0 will + * have already exited the loop and the mutex + * will already be unlocked. + * + * Because this is not an ERRORCHECK mutex, + * that inconsistency will be silently ignored. + */ + pthread_mutex_unlock(&finfo->next->mutex); + } + + free(xbuf); + /* + * Return this thread's finfo, which is + * a unique value for this thread. + */ + return finfo; +} + +static int create_threads(int num, struct futex_info *finfo) +{ + int i; + + for (i = 0; i < num; i++) { + int next_nr; + + finfo[i].nr = i; + /* + * Thread 'i' will wait on this mutex to + * be unlocked. Lock it immediately after + * initialization: + */ + pthread_mutex_init(&finfo[i].mutex, NULL); + pthread_mutex_lock(&finfo[i].mutex); + + next_nr = (i + 1) % num; + finfo[i].next = &finfo[next_nr]; + + if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i])) + fatal_error("pthread_create()"); + } + return 0; +} + +static void affinitize_cpu0(void) +{ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + fatal_error("sched_setaffinity to CPU 0"); +} + +static void test_context_switch(void) +{ + struct futex_info *finfo; + int i; + + /* Affinitize to one CPU to force context switches */ + affinitize_cpu0(); + + req_xtiledata_perm(); + + printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n", + ctxtswtest_config.iterations, + ctxtswtest_config.num_threads); + + + finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads); + if (!finfo) + fatal_error("malloc()"); + + create_threads(ctxtswtest_config.num_threads, finfo); + + /* + * This thread wakes up thread 0 + * Thread 0 will wake up 1 + * Thread 1 will wake up 2 + * ... + * the last thread will wake up 0 + * + * ... this will repeat for the configured + * number of iterations. + */ + pthread_mutex_unlock(&finfo[0].mutex); + + /* Wait for all the threads to finish: */ + for (i = 0; i < ctxtswtest_config.num_threads; i++) { + void *thread_retval; + int rc; + + rc = pthread_join(finfo[i].thread, &thread_retval); + + if (rc) + fatal_error("pthread_join() failed for thread %d err: %d\n", + i, rc); + + if (thread_retval != &finfo[i]) + fatal_error("unexpected thread retval for thread %d: %p\n", + i, thread_retval); + + } + + printf("[OK]\tNo incorrect case was found.\n"); + + free(finfo); +} + +int main(void) +{ + /* Check hardware availability at first */ + check_cpuid_xsave(); + check_cpuid_xtiledata(); + + init_stashed_xsave(); + sethandler(SIGILL, handle_noperm, 0); + + test_dynamic_state(); + + /* Request permission for the following tests */ + req_xtiledata_perm(); + + test_fork(); + + ctxtswtest_config.iterations = 10; + ctxtswtest_config.num_threads = 5; + test_context_switch(); + + clearhandler(SIGILL); + free_stashed_xsave(); + + return 0; +} diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c index bab2f6e06b63..7e3e09c1abac 100644 --- a/tools/testing/selftests/x86/iopl.c +++ b/tools/testing/selftests/x86/iopl.c @@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port) printf("[OK]\toutb to 0x%02hx failed\n", port); } -static bool try_cli(void) +#define RET_FAULTED 0 +#define RET_FAIL 1 +#define RET_EMUL 2 + +static int try_cli(void) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("cli"); - return true; + asm volatile("cli; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (!(flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static bool try_sti(void) +static int try_sti(bool irqs_off) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("sti"); - return true; + asm volatile("sti; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (irqs_off && (flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static void expect_gp_sti(void) +static void expect_gp_sti(bool irqs_off) { - if (try_sti()) { + int ret = try_sti(irqs_off); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tSTI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tSTI NOPped\n"); + break; + default: printf("[FAIL]\tSTI worked\n"); nerrs++; - } else { - printf("[OK]\tSTI faulted\n"); } } -static void expect_gp_cli(void) +/* + * Returns whether it managed to disable interrupts. + */ +static bool test_cli(void) { - if (try_cli()) { + int ret = try_cli(); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tCLI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tCLI NOPped\n"); + break; + default: printf("[FAIL]\tCLI worked\n"); nerrs++; - } else { - printf("[OK]\tCLI faulted\n"); + return true; } + + return false; } int main(void) @@ -152,8 +192,7 @@ int main(void) } /* Make sure that CLI/STI are blocked even with IOPL level 3 */ - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); expect_ok_outb(0x80); /* Establish an I/O bitmap to test the restore */ @@ -204,8 +243,7 @@ int main(void) printf("[RUN]\tparent: write to 0x80 (should fail)\n"); expect_gp_outb(0x80); - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); /* Test the capability checks. */ printf("\tiopl(3)\n"); diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 65c141ebfbbd..5b45e6986aea 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -497,7 +497,7 @@ static int test_process_vm_readv(void) } if (vsyscall_map_r) { - if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { + if (!memcmp(buf, remote.iov_base, sizeof(buf))) { printf("[OK]\tIt worked and read correct data\n"); } else { printf("[FAIL]\tIt worked but returned incorrect data\n"); diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index cec6f5a738e1..fa927ad16f8a 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -332,8 +332,6 @@ static void test_no_sockets(const struct test_opts *opts) read_vsock_stat(&sockets); check_no_sockets(&sockets); - - free_sock_stat(&sockets); } static void test_listen_socket_server(const struct test_opts *opts) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index 3a2e6bb781a8..59a7f2346eab 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1538,7 +1538,7 @@ static void tracing_loop(void) mutex_lock(&print_mtx); check_signals(); write_or_die(fd_stdout, queue_full_warning, - sizeof(queue_full_warning)); + strlen(queue_full_warning)); mutex_unlock(&print_mtx); } modified--; diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index f62f10c988db..b1ed76d9a979 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -390,7 +390,7 @@ static void show_page_range(unsigned long voffset, unsigned long offset, if (opt_pid) printf("%lx\t", voff); if (opt_file) - printf("%lu\t", voff); + printf("%lx\t", voff); if (opt_list_cgroup) printf("@%llu\t", (unsigned long long)cgroup0); if (opt_list_mapcnt) @@ -418,7 +418,7 @@ static void show_page(unsigned long voffset, unsigned long offset, if (opt_pid) printf("%lx\t", voffset); if (opt_file) - printf("%lu\t", voffset); + printf("%lx\t", voffset); if (opt_list_cgroup) printf("@%llu\t", (unsigned long long)cgroup); if (opt_list_mapcnt) @@ -967,22 +967,19 @@ static struct sigaction sigbus_action = { .sa_flags = SA_SIGINFO, }; -static void walk_file(const char *name, const struct stat *st) +static void walk_file_range(const char *name, int fd, + unsigned long off, unsigned long end) { uint8_t vec[PAGEMAP_BATCH]; uint64_t buf[PAGEMAP_BATCH], flags; uint64_t cgroup = 0; uint64_t mapcnt = 0; unsigned long nr_pages, pfn, i; - off_t off, end = st->st_size; - int fd; ssize_t len; void *ptr; int first = 1; - fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); - - for (off = 0; off < end; off += len) { + for (; off < end; off += len) { nr_pages = (end - off + page_size - 1) / page_size; if (nr_pages > PAGEMAP_BATCH) nr_pages = PAGEMAP_BATCH; @@ -1037,12 +1034,26 @@ got_sigbus: if (first && opt_list) { first = 0; flush_page_range(); - show_file(name, st); } add_page(off / page_size + i, pfn, flags, cgroup, mapcnt, buf[i]); } } +} + +static void walk_file(const char *name, const struct stat *st) +{ + int i; + int fd; + + fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); + + if (!nr_addr_ranges) + add_addr_range(0, st->st_size / page_size); + + for (i = 0; i < nr_addr_ranges; i++) + walk_file_range(name, fd, opt_offset[i] * page_size, + (opt_offset[i] + opt_size[i]) * page_size); close(fd); } @@ -1062,10 +1073,10 @@ int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f) return 0; } +struct stat st; + static void walk_page_cache(void) { - struct stat st; - kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); sigaction(SIGBUS, &sigbus_action, NULL); @@ -1362,6 +1373,11 @@ int main(int argc, char *argv[]) if (opt_list) printf("\n\n"); + if (opt_file) { + show_file(opt_file, &st); + printf("\n"); + } + show_summary(); if (opt_list_mapcnt) diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 0e75f22c9475..9ebb84a9c731 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -5,6 +5,8 @@ * Example use: * cat /sys/kernel/debug/page_owner > page_owner_full.txt * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt + * Or sort by total memory: + * ./page_owner_sort -m page_owner_full.txt sorted_page_owner.txt * * See Documentation/vm/page_owner.rst */ @@ -16,14 +18,18 @@ #include <fcntl.h> #include <unistd.h> #include <string.h> +#include <regex.h> +#include <errno.h> struct block_list { char *txt; int len; int num; + int page_num; }; - +static int sort_by_memory; +static regex_t order_pattern; static struct block_list *list; static int list_size; static int max_size; @@ -59,12 +65,50 @@ static int compare_num(const void *p1, const void *p2) return l2->num - l1->num; } +static int compare_page_num(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l2->page_num - l1->page_num; +} + +static int get_page_num(char *buf) +{ + int err, val_len, order_val; + char order_str[4] = {0}; + char *endptr; + regmatch_t pmatch[2]; + + err = regexec(&order_pattern, buf, 2, pmatch, REG_NOTBOL); + if (err != 0 || pmatch[1].rm_so == -1) { + printf("no order pattern in %s\n", buf); + return 0; + } + val_len = pmatch[1].rm_eo - pmatch[1].rm_so; + if (val_len > 2) /* max_order should not exceed 2 digits */ + goto wrong_order; + + memcpy(order_str, buf + pmatch[1].rm_so, val_len); + + errno = 0; + order_val = strtol(order_str, &endptr, 10); + if (errno != 0 || endptr == order_str || *endptr != '\0') + goto wrong_order; + + return 1 << order_val; + +wrong_order: + printf("wrong order in follow buf:\n%s\n", buf); + return 0; +} + static void add_list(char *buf, int len) { if (list_size != 0 && len == list[list_size-1].len && memcmp(buf, list[list_size-1].txt, len) == 0) { list[list_size-1].num++; + list[list_size-1].page_num += get_page_num(buf); return; } if (list_size == max_size) { @@ -74,6 +118,7 @@ static void add_list(char *buf, int len) list[list_size].txt = malloc(len+1); list[list_size].len = len; list[list_size].num = 1; + list[list_size].page_num = get_page_num(buf); memcpy(list[list_size].txt, buf, len); list[list_size].txt[len] = 0; list_size++; @@ -85,6 +130,13 @@ static void add_list(char *buf, int len) #define BUF_SIZE (128 * 1024) +static void usage(void) +{ + printf("Usage: ./page_owner_sort [-m] <input> <output>\n" + "-m Sort by total memory. If this option is unset, sort by times\n" + ); +} + int main(int argc, char **argv) { FILE *fin, *fout; @@ -92,21 +144,39 @@ int main(int argc, char **argv) int ret, i, count; struct block_list *list2; struct stat st; + int err; + int opt; - if (argc < 3) { - printf("Usage: ./program <input> <output>\n"); - perror("open: "); + while ((opt = getopt(argc, argv, "m")) != -1) + switch (opt) { + case 'm': + sort_by_memory = 1; + break; + default: + usage(); + exit(1); + } + + if (optind >= (argc - 1)) { + usage(); exit(1); } - fin = fopen(argv[1], "r"); - fout = fopen(argv[2], "w"); + fin = fopen(argv[optind], "r"); + fout = fopen(argv[optind + 1], "w"); if (!fin || !fout) { - printf("Usage: ./program <input> <output>\n"); + usage(); perror("open: "); exit(1); } + err = regcomp(&order_pattern, "order\\s*([0-9]*),", REG_EXTENDED|REG_NEWLINE); + if (err != 0 || order_pattern.re_nsub != 1) { + printf("%s: Invalid pattern 'order\\s*([0-9]*),' code %d\n", + argv[0], err); + exit(1); + } + fstat(fileno(fin), &st); max_size = st.st_size / 100; /* hack ... */ @@ -145,13 +215,19 @@ int main(int argc, char **argv) list2[count++] = list[i]; } else { list2[count-1].num += list[i].num; + list2[count-1].page_num += list[i].page_num; } } - qsort(list2, count, sizeof(list[0]), compare_num); + if (sort_by_memory) + qsort(list2, count, sizeof(list[0]), compare_page_num); + else + qsort(list2, count, sizeof(list[0]), compare_num); for (i = 0; i < count; i++) - fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt); + fprintf(fout, "%d times, %d pages:\n%s\n", + list2[i].num, list2[i].page_num, list2[i].txt); + regfree(&order_pattern); return 0; } |