diff options
Diffstat (limited to 'samples')
26 files changed, 973 insertions, 104 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6c7468eb3684..9c650589e80f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example +hostprogs-y += load_sock_ops # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o @@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o always += test_map_in_map_kern.o always += cookie_uid_helper_example.o +always += tcp_synrto_kern.o +always += tcp_rwnd_kern.o +always += tcp_bufs_kern.o +always += tcp_cong_kern.o +always += tcp_iw_kern.o +always += tcp_clamp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf +HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf @@ -160,6 +169,17 @@ clean: $(MAKE) -C ../../ M=$(CURDIR) clean @rm -f *~ +$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c + $(call if_changed_dep,cc_s_c) + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +clean-files += syscall_nrs.h + +FORCE: + + # Verify LLVM compiler tools are available and bpf target is supported by llc .PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) @@ -180,11 +200,13 @@ verify_target_bpf: verify_cmds $(src)/*.c: verify_target_bpf +$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c - $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 9a9c95f2c9fb..d50ac342dc92 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -31,7 +31,8 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) = (void *) BPF_FUNC_get_current_uid_gid; static int (*bpf_get_current_comm)(void *buf, int buf_size) = (void *) BPF_FUNC_get_current_comm; -static int (*bpf_perf_event_read)(void *map, int index) = +static unsigned long long (*bpf_perf_event_read)(void *map, + unsigned long long flags) = (void *) BPF_FUNC_perf_event_read; static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; @@ -59,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; +static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, + int optlen) = + (void *) BPF_FUNC_setsockopt; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -135,6 +139,19 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->pc) +#elif defined(__mips__) + +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + #elif defined(__powerpc__) #define PT_REGS_PARM1(x) ((x)->gpr[3]) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 74456b3eb89a..899f40310bc3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + bool is_sockops = strncmp(event, "sockops", 7) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_CGROUP_SKB; } else if (is_cgroup_sk) { prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + } else if (is_sockops) { + prog_type = BPF_PROG_TYPE_SOCK_OPS; } else { printf("Unknown event '%s'\n", event); return -1; @@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket) { - event += 6; + if (is_socket || is_sockops) { + if (is_socket) + event += 6; + else + event += 7; if (*event != '/') return 0; event++; @@ -516,16 +522,18 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) processed_sec[maps_shndx] = true; } - /* load programs that need map fixup (relocations) */ + /* process all relo sections, and rewrite bpf insns for maps */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) continue; if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) continue; + if (shdr.sh_type == SHT_REL) { struct bpf_insn *insns; + /* locate prog sec that need map fixup (relocations) */ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, &shdr_prog, &data_prog)) continue; @@ -535,26 +543,15 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) continue; insns = (struct bpf_insn *) data_prog->d_buf; - - processed_sec[shdr.sh_info] = true; - processed_sec[i] = true; + processed_sec[i] = true; /* relo section */ if (parse_relo_and_apply(data, symbols, &shdr, insns, map_data, nr_maps)) continue; - - if (memcmp(shname_prog, "kprobe/", 7) == 0 || - memcmp(shname_prog, "kretprobe/", 10) == 0 || - memcmp(shname_prog, "tracepoint/", 11) == 0 || - memcmp(shname_prog, "xdp", 3) == 0 || - memcmp(shname_prog, "perf_event", 10) == 0 || - memcmp(shname_prog, "socket", 6) == 0 || - memcmp(shname_prog, "cgroup/", 7) == 0) - load_and_attach(shname_prog, insns, data_prog->d_size); } } - /* load programs that don't use maps */ + /* load programs */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) @@ -569,8 +566,13 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0) - load_and_attach(shname, data->d_buf, data->d_size); + memcmp(shname, "cgroup/", 7) == 0 || + memcmp(shname, "sockops", 7) == 0) { + ret = load_and_attach(shname, data->d_buf, + data->d_size); + if (ret != 0) + goto done; + } } ret = 0; diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c new file mode 100644 index 000000000000..e5da6cf71a3e --- /dev/null +++ b/samples/bpf/load_sock_ops.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> + +static void usage(char *pname) +{ + printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname); + printf("\tLoad and attach a sock_ops program to the specified " + "cgroup\n"); + printf("\tIf \"-l\" is used, the program will continue to run\n"); + printf("\tprinting the BPF log buffer\n"); + printf("\tIf the specified filename does not end in \".o\", it\n"); + printf("\tappends \"_kern.o\" to the name\n"); + printf("\n"); + printf(" %s -r <cg-path>\n", pname); + printf("\tDetaches the currently attached sock_ops program\n"); + printf("\tfrom the specified cgroup\n"); + printf("\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int logFlag = 0; + int error = 0; + char *cg_path; + char fn[500]; + char *prog; + int cg_fd; + + if (argc < 3) + usage(argv[0]); + + if (!strcmp(argv[1], "-r")) { + cg_path = argv[2]; + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + if (error) { + printf("ERROR: bpf_prog_detach: %d (%s)\n", + error, strerror(errno)); + return 2; + } + return 0; + } else if (!strcmp(argv[1], "-h")) { + usage(argv[0]); + } else if (!strcmp(argv[1], "-l")) { + logFlag = 1; + if (argc < 4) + usage(argv[0]); + } + + prog = argv[argc - 1]; + cg_path = argv[argc - 2]; + if (strlen(prog) > 480) { + fprintf(stderr, "ERROR: program name too long (> 480 chars)\n"); + return 3; + } + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + + if (!strcmp(prog + strlen(prog)-2, ".o")) + strcpy(fn, prog); + else + sprintf(fn, "%s_kern.o", prog); + if (logFlag) + printf("loading bpf file:%s\n", fn); + if (load_bpf_file(fn)) { + printf("ERROR: load_bpf_file failed for: %s\n", fn); + printf("%s", bpf_log_buf); + return 4; + } + if (logFlag) + printf("TCP BPF Loaded %s\n", fn); + + error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (error) { + printf("ERROR: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + return 5; + } else if (logFlag) { + read_trace_pipe(); + } + + return error; +} diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index b5524d417eb5..877ecf8fc5ac 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -8,6 +8,10 @@ #include <arpa/inet.h> #include <sys/resource.h> +#define PARSE_IP 3 +#define PARSE_IP_PROG_FD (prog_fd[0]) +#define PROG_ARRAY_FD (map_fd[0]) + struct bpf_flow_keys { __be32 src; __be32 dst; @@ -28,7 +32,9 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; - int i, sock; + int i, sock, err, id, key = PARSE_IP; + struct bpf_prog_info info = {}; + uint32_t info_len = sizeof(info); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); setrlimit(RLIMIT_MEMLOCK, &r); @@ -38,6 +44,13 @@ int main(int argc, char **argv) return 1; } + /* Test fd array lookup which returns the id of the bpf_prog */ + err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len); + assert(!err); + err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id); + assert(!err); + assert(id == info.id); + sock = open_raw_sock("lo"); assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c new file mode 100644 index 000000000000..ce2a30b11248 --- /dev/null +++ b/samples/bpf/syscall_nrs.c @@ -0,0 +1,12 @@ +#include <uapi/linux/unistd.h> +#include <linux/kbuild.h> + +#define SYSNR(_NR) DEFINE(SYS ## _NR, _NR) + +void syscall_defines(void) +{ + COMMENT("Linux system call numbers."); + SYSNR(__NR_write); + SYSNR(__NR_read); + SYSNR(__NR_mmap); +} diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c new file mode 100644 index 000000000000..ee83bbabd17c --- /dev/null +++ b/samples/bpf/tcp_bufs_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets and send + * and receive buffers to 1.5MB. This would usually be done after + * doing appropriate checks that indicate the hosts are far enough + * away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_bufs(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Nothing to do */ + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c new file mode 100644 index 000000000000..d68eadd9ca2d --- /dev/null +++ b/samples/bpf/tcp_clamp_kern.c @@ -0,0 +1,102 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp + * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within + * the same datacenter. For his example, we assume they are within the same + * datacenter when the first 5.5 bytes of their IPv6 addresses are the same. + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_clamp(struct bpf_sock_ops *skops) +{ + int bufsize = 150000; + int to_init = 10; + int clamp = 100; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check that both hosts are within same datacenter. For this example + * it is the case when the first 5.5 bytes of their IPv6 addresses are + * the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + rv = to_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, + &bufsize, sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c new file mode 100644 index 000000000000..dac15bce1fa9 --- /dev/null +++ b/samples/bpf/tcp_cong_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set congestion control to dctcp when both hosts are + * in the same datacenter (as deteremined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_cong(struct bpf_sock_ops *skops) +{ + char cong[] = "dctcp"; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_NEEDS_ECN: + rv = 1; + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c new file mode 100644 index 000000000000..23c5122ef819 --- /dev/null +++ b/samples/bpf/tcp_iw_kern.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial congestion window and initial receive + * window to 40 packets and send and receive buffers to 1.5MB. This + * would usually be done after doing appropriate checks that indicate + * the hosts are far enough away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_iw(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int iw = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, + sizeof(iw)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c new file mode 100644 index 000000000000..3f2a228f81ce --- /dev/null +++ b/samples/bpf/tcp_rwnd_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets when using IPv6 + * and the first 5.5 bytes of the IPv6 addresses are not the same (in this + * example that means both hosts are not the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_rwnd(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != + 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for RWND_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_RWND_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are not the same + * then both hosts are not in the same datacenter + * so use a larger initial advertized window (40 packets) + */ + if (skops->local_ip6[0] != skops->remote_ip6[0] || + (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) != + (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000)) + rv = 40; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c new file mode 100644 index 000000000000..3c3fc83d81cb --- /dev/null +++ b/samples/bpf/tcp_synrto_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses + * and the first 5.5 bytes of the IPv6 addresses are the same (in this example + * that means both hosts are in the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_synrto(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for TIMEOUT_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_TIMEOUT_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are the same + * then both hosts are in the same datacenter + * so use an RTO of 10ms + */ + if (skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) + rv = 10; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index f62fdc2bd428..1aca18539d8d 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -32,6 +32,20 @@ static const char * const test_names[] = { #define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) +static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key) +{ + struct bpf_map_info info = {}; + uint32_t info_len = sizeof(info); + int ret, id; + + ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len); + assert(!ret); + + ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id); + assert(!ret); + assert(id == info.id); +} + static void populate_map(uint32_t port_key, int magic_result) { int ret; @@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result) ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY); assert(!ret); + check_map_id(PORT_A, A_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_A, H_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_H, H_OF_PORT_H, port_key); } static void test_map_in_map(void) diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index fa4336423da5..7bd827b84a67 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -75,7 +75,10 @@ static void print_stack(struct key_t *key, __u64 count) for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_addr(ip[i]); } - printf("\n"); + if (count < 6) + printf("\r"); + else + printf("\n"); if (key->kernstack == -EEXIST && !warned) { printf("stackmap collisions seen. Consider increasing size\n"); @@ -105,7 +108,7 @@ static void print_stacks(void) bpf_map_delete_elem(fd, &next_key); key = next_key; } - + printf("\n"); if (!sys_read_seen || !sys_write_seen) { printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); int_exit(0); @@ -122,24 +125,29 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) { int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int i; + int i, error = 0; /* open perf_event on all cpus */ for (i = 0; i < nr_cpus; i++) { pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0); if (pmu_fd[i] < 0) { printf("sys_perf_event_open failed\n"); + error = 1; goto all_cpu_err; } assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); } - system("dd if=/dev/zero of=/dev/null count=5000k"); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); all_cpu_err: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); close(pmu_fd[i]); + } free(pmu_fd); + if (error) + int_exit(0); } static void test_perf_event_task(struct perf_event_attr *attr) @@ -150,12 +158,13 @@ static void test_perf_event_task(struct perf_event_attr *attr) pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0); if (pmu_fd < 0) { printf("sys_perf_event_open failed\n"); - return; + int_exit(0); } assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); - assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); - system("dd if=/dev/zero of=/dev/null count=5000k"); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); + system("dd if=/dev/zero of=/dev/null count=5000k status=none"); print_stacks(); + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); close(pmu_fd); } @@ -175,11 +184,56 @@ static void test_bpf_perf_event(void) .config = PERF_COUNT_SW_CPU_CLOCK, .inherit = 1, }; + struct perf_event_attr attr_hw_cache_l1d = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_hw_cache_branch_miss = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HW_CACHE, + .config = + PERF_COUNT_HW_CACHE_BPU | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + .inherit = 1, + }; + struct perf_event_attr attr_type_raw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_RAW, + /* Intel Instruction Retired */ + .config = 0xc0, + .inherit = 1, + }; + printf("Test HW_CPU_CYCLES\n"); test_perf_event_all_cpu(&attr_type_hw); test_perf_event_task(&attr_type_hw); + + printf("Test SW_CPU_CLOCK\n"); test_perf_event_all_cpu(&attr_type_sw); test_perf_event_task(&attr_type_sw); + + printf("Test HW_CACHE_L1D\n"); + test_perf_event_all_cpu(&attr_hw_cache_l1d); + test_perf_event_task(&attr_hw_cache_l1d); + + printf("Test HW_CACHE_BPU\n"); + test_perf_event_all_cpu(&attr_hw_cache_branch_miss); + test_perf_event_task(&attr_hw_cache_branch_miss); + + printf("Test Instruction Retired\n"); + test_perf_event_all_cpu(&attr_type_raw); + test_perf_event_task(&attr_type_raw); + + printf("*** PASS ***\n"); } @@ -209,7 +263,6 @@ int main(int argc, char **argv) return 0; } test_bpf_perf_event(); - int_exit(0); return 0; } diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index 7e4cf74553ff..f57f4e1ea1ec 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -9,6 +9,7 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/seccomp.h> #include <uapi/linux/unistd.h> +#include "syscall_nrs.h" #include "bpf_helpers.h" #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F @@ -17,7 +18,11 @@ struct bpf_map_def SEC("maps") progs = { .type = BPF_MAP_TYPE_PROG_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(u32), +#ifdef __mips__ + .max_entries = 6000, /* MIPS n64 syscalls start at 5000 */ +#else .max_entries = 1024, +#endif }; SEC("kprobe/__seccomp_filter") @@ -37,7 +42,7 @@ int bpf_prog1(struct pt_regs *ctx) } /* we jump here when syscall number == __NR_write */ -PROG(__NR_write)(struct pt_regs *ctx) +PROG(SYS__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; @@ -50,7 +55,7 @@ PROG(__NR_write)(struct pt_regs *ctx) return 0; } -PROG(__NR_read)(struct pt_regs *ctx) +PROG(SYS__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; @@ -63,7 +68,7 @@ PROG(__NR_read)(struct pt_regs *ctx) return 0; } -PROG(__NR_mmap)(struct pt_regs *ctx) +PROG(SYS__NR_mmap)(struct pt_regs *ctx) { char fmt[] = "mmap\n"; bpf_trace_printk(fmt, sizeof(fmt)); diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c index be479c4af9e2..e7d180305974 100644 --- a/samples/bpf/tracex6_kern.c +++ b/samples/bpf/tracex6_kern.c @@ -3,22 +3,36 @@ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") my_map = { +struct bpf_map_def SEC("maps") counters = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), - .max_entries = 32, + .max_entries = 64, +}; +struct bpf_map_def SEC("maps") values = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(u64), + .max_entries = 64, }; -SEC("kprobe/sys_write") +SEC("kprobe/htab_map_get_next_key") int bpf_prog1(struct pt_regs *ctx) { - u64 count; u32 key = bpf_get_smp_processor_id(); - char fmt[] = "CPU-%d %llu\n"; + u64 count, *val; + s64 error; + + count = bpf_perf_event_read(&counters, key); + error = (s64)count; + if (error <= -2 && error >= -22) + return 0; - count = bpf_perf_event_read(&my_map, key); - bpf_trace_printk(fmt, sizeof(fmt), key, count); + val = bpf_map_lookup_elem(&values, &key); + if (val) + *val = count; + else + bpf_map_update_elem(&values, &key, &count, BPF_NOEXIST); return 0; } diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index ca7874ed77f4..a05a99a0752f 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -1,73 +1,177 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> +#define _GNU_SOURCE + +#include <assert.h> #include <fcntl.h> -#include <poll.h> -#include <sys/ioctl.h> #include <linux/perf_event.h> #include <linux/bpf.h> -#include "libbpf.h" +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + #include "bpf_load.h" +#include "libbpf.h" #include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL -static void test_bpf_perf_event(void) +static void check_on_cpu(int cpu, struct perf_event_attr *attr) { - int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - int *pmu_fd = malloc(nr_cpus * sizeof(int)); - int status, i; + int pmu_fd, error = 0; + cpu_set_t set; + __u64 value; - struct perf_event_attr attr_insn_pmu = { + /* Move to target CPU */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + assert(sched_setaffinity(0, sizeof(set), &set) == 0); + /* Open perf event and attach to the perf_event_array */ + pmu_fd = sys_perf_event_open(attr, -1/*pid*/, cpu/*cpu*/, -1/*group_fd*/, 0); + if (pmu_fd < 0) { + fprintf(stderr, "sys_perf_event_open failed on CPU %d\n", cpu); + error = 1; + goto on_exit; + } + assert(bpf_map_update_elem(map_fd[0], &cpu, &pmu_fd, BPF_ANY) == 0); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + /* Trigger the kprobe */ + bpf_map_get_next_key(map_fd[1], &cpu, NULL); + /* Check the value */ + if (bpf_map_lookup_elem(map_fd[1], &cpu, &value)) { + fprintf(stderr, "Value missing for CPU %d\n", cpu); + error = 1; + goto on_exit; + } + fprintf(stderr, "CPU %d: %llu\n", cpu, value); + +on_exit: + assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE, 0) == 0 || error); + assert(close(pmu_fd) == 0 || error); + assert(bpf_map_delete_elem(map_fd[1], &cpu) == 0 || error); + exit(error); +} + +static void test_perf_event_array(struct perf_event_attr *attr, + const char *name) +{ + int i, status, nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pid_t pid[nr_cpus]; + int err = 0; + + printf("Test reading %s counters\n", name); + + for (i = 0; i < nr_cpus; i++) { + pid[i] = fork(); + assert(pid[i] >= 0); + if (pid[i] == 0) { + check_on_cpu(i, attr); + exit(1); + } + } + + for (i = 0; i < nr_cpus; i++) { + assert(waitpid(pid[i], &status, 0) == pid[i]); + err |= status; + } + + if (err) + printf("Test: %s FAILED\n", name); +} + +static void test_bpf_perf_event(void) +{ + struct perf_event_attr attr_cycles = { .freq = 0, .sample_period = SAMPLE_PERIOD, .inherit = 0, .type = PERF_TYPE_HARDWARE, .read_format = 0, .sample_type = 0, - .config = 0,/* PMU: cycles */ + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + struct perf_event_attr attr_clock = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_SOFTWARE, + .read_format = 0, + .sample_type = 0, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + struct perf_event_attr attr_raw = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_RAW, + .read_format = 0, + .sample_type = 0, + /* Intel Instruction Retired */ + .config = 0xc0, + }; + struct perf_event_attr attr_l1d_load = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_L1D | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + }; + struct perf_event_attr attr_llc_miss = { + .freq = 0, + .sample_period = SAMPLE_PERIOD, + .inherit = 0, + .type = PERF_TYPE_HW_CACHE, + .read_format = 0, + .sample_type = 0, + .config = + PERF_COUNT_HW_CACHE_LL | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), + }; + struct perf_event_attr attr_msr_tsc = { + .freq = 0, + .sample_period = 0, + .inherit = 0, + /* From /sys/bus/event_source/devices/msr/ */ + .type = 7, + .read_format = 0, + .sample_type = 0, + .config = 0, }; - for (i = 0; i < nr_cpus; i++) { - pmu_fd[i] = sys_perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0); - if (pmu_fd[i] < 0) { - printf("event syscall failed\n"); - goto exit; - } - - bpf_map_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY); - ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); - } + test_perf_event_array(&attr_cycles, "HARDWARE-cycles"); + test_perf_event_array(&attr_clock, "SOFTWARE-clock"); + test_perf_event_array(&attr_raw, "RAW-instruction-retired"); + test_perf_event_array(&attr_l1d_load, "HW_CACHE-L1D-load"); - status = system("ls > /dev/null"); - if (status) - goto exit; - status = system("sleep 2"); - if (status) - goto exit; - -exit: - for (i = 0; i < nr_cpus; i++) - close(pmu_fd[i]); - close(map_fd[0]); - free(pmu_fd); + /* below tests may fail in qemu */ + test_perf_event_array(&attr_llc_miss, "HW_CACHE-LLC-miss"); + test_perf_event_array(&attr_msr_tsc, "Dynamic-msr-tsc"); } int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; } test_bpf_perf_event(); - read_trace_pipe(); - return 0; } diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst index 8365c4e5c513..ff8929da61c5 100644 --- a/samples/pktgen/README.rst +++ b/samples/pktgen/README.rst @@ -21,7 +21,9 @@ across the sample scripts. Usage example is printed on errors:: -d : ($DEST_IP) destination IP -m : ($DST_MAC) destination MAC-addr -t : ($THREADS) threads to start + -f : ($F_THREAD) index of first thread (zero indexed CPU number) -c : ($SKB_CLONE) SKB clones send before alloc new SKB + -n : ($COUNT) num messages to send per thread, 0 means indefinitely -b : ($BURST) HW level bursting of SKBs -v : ($VERBOSE) verbose -x : ($DEBUG) debug diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh index f70ea7dd5660..3a6244d5f47a 100644 --- a/samples/pktgen/parameters.sh +++ b/samples/pktgen/parameters.sh @@ -10,7 +10,9 @@ function usage() { echo " -d : (\$DEST_IP) destination IP" echo " -m : (\$DST_MAC) destination MAC-addr" echo " -t : (\$THREADS) threads to start" + echo " -f : (\$F_THREAD) index of first thread (zero indexed CPU number)" echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB" + echo " -n : (\$COUNT) num messages to send per thread, 0 means indefinitely" echo " -b : (\$BURST) HW level bursting of SKBs" echo " -v : (\$VERBOSE) verbose" echo " -x : (\$DEBUG) debug" @@ -20,7 +22,7 @@ function usage() { ## --- Parse command line arguments / parameters --- ## echo "Commandline options:" -while getopts "s:i:d:m:t:c:b:vxh6" option; do +while getopts "s:i:d:m:f:t:c:n:b:vxh6" option; do case $option in i) # interface export DEV=$OPTARG @@ -38,16 +40,22 @@ while getopts "s:i:d:m:t:c:b:vxh6" option; do export DST_MAC=$OPTARG info "Destination MAC set to: DST_MAC=$DST_MAC" ;; + f) + export F_THREAD=$OPTARG + info "Index of first thread (zero indexed CPU number): $F_THREAD" + ;; t) export THREADS=$OPTARG - export CPU_THREADS=$OPTARG - let "CPU_THREADS -= 1" - info "Number of threads to start: $THREADS (0 to $CPU_THREADS)" + info "Number of threads to start: $THREADS" ;; c) export CLONE_SKB=$OPTARG info "CLONE_SKB=$CLONE_SKB" ;; + n) + export COUNT=$OPTARG + info "COUNT=$COUNT" + ;; b) export BURST=$OPTARG info "SKB bursting: BURST=$BURST" @@ -77,12 +85,17 @@ if [ -z "$PKT_SIZE" ]; then info "Default packet size set to: set to: $PKT_SIZE bytes" fi +if [ -z "$F_THREAD" ]; then + # First thread (F_THREAD) reference the zero indexed CPU number + export F_THREAD=0 +fi + if [ -z "$THREADS" ]; then - # Zero CPU threads means one thread, because CPU numbers are zero indexed - export CPU_THREADS=0 export THREADS=1 fi +export L_THREAD=$(( THREADS + F_THREAD - 1 )) + if [ -z "$DEV" ]; then usage err 2 "Please specify output device" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index f3e1bedfd77f..e5bfe759a0fb 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -39,16 +39,16 @@ if [ -z "$DEST_IP" ]; then fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=1024 +[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="10000000" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -81,7 +81,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index cc102e923241..1ad878e95539 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -22,16 +22,16 @@ fi if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" fi +[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="10000000" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -61,7 +61,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 29ef4ba50796..35b7fe34bda2 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -20,10 +20,10 @@ fi [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Example enforce param "-m" for dst_mac [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" +[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="100000" # Zero means indefinitely # Flow variation random source port between min and max UDP_MIN=9 diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index c88a161d3e6f..cbdd3e2bceff 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -13,9 +13,10 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely + # Base Config DELAY="0" # Zero means max speed -COUNT="100000" # Zero means indefinitely [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Flow variation random source port between min and max @@ -32,7 +33,7 @@ fi pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # The device name is extended with @name, using thread number to # make then unique, but any name will do. dev=${DEV}@${thread} @@ -70,7 +71,7 @@ pg_ctrl "start" echo "Done" >&2 # Print results -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 80cf8f5ba6b2..8d26e0ca683d 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -31,16 +31,16 @@ fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=32 [ -z "$CLONE_SKB" ] && CLONE_SKB="100000" +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -71,7 +71,7 @@ done # Run if user hits control-c function control_c() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index f60412e445bb..497fb7520464 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -15,6 +15,7 @@ source ${basedir}/parameters.sh [ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # NOTICE: Script specific settings # ======= @@ -26,7 +27,6 @@ source ${basedir}/parameters.sh # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely if [[ -n "$BURST" ]]; then err 1 "Bursting not supported for this mode" @@ -36,7 +36,7 @@ fi pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -78,7 +78,7 @@ done # Run if user hits control-c function print_result() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 32ad818e2829..ac9cfd6b2c0a 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -20,17 +20,17 @@ source ${basedir}/parameters.sh [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 +[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely # Base Config DELAY="0" # Zero means max speed -COUNT="0" # Zero means indefinitely # General cleanup everything since last run pg_ctrl "reset" # Threads are specified with parameter -t value in $THREADS -for ((thread = 0; thread < $THREADS; thread++)); do +for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} # Add remove all other devices and add_device $dev to thread @@ -66,7 +66,7 @@ done # Run if user hits control-c function print_result() { # Print results - for ((thread = 0; thread < $THREADS; thread++)); do + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" |