diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-10-07 00:36:49 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-10-07 00:36:49 +0200 |
commit | c68306ce20ad03ce655a367fc33ad06e12bb87a6 (patch) | |
tree | 73eb2890678d20f32da70d5f373612e03bd6e509 /tools | |
parent | 8657355f5b5f657407efc12a2223e8a3a6d658de (diff) | |
parent | 87095f7ddeff3038a0cf8e6574922f9c11688619 (diff) |
Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
- Intel PT timestamp fixes (Adrian Hunter)
- Fix Intel JSON fixed counter conversions (Andi Kleen)
- Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo)
- Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha)
- Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or
equal the first event (Namhyung Kim)
- Fix uretprobe probe placement on ppc64le (Ravi Bangoria)
- Support building C++ source files and add feature detection for g++,
prep work for supporting a builtin clang/llvm, to remove the need for having
that toolchain installed to automagically build BPF scriptlets that then
gets uploaded to the kernel via sys_bpf() (Wang Nan)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/arch/x86/include/asm/cpufeatures.h | 1 | ||||
-rw-r--r-- | tools/arch/x86/lib/memcpy_64.S | 6 | ||||
-rw-r--r-- | tools/build/Build.include | 1 | ||||
-rw-r--r-- | tools/build/Makefile.build | 7 | ||||
-rw-r--r-- | tools/build/Makefile.feature | 2 | ||||
-rw-r--r-- | tools/build/feature/Makefile | 10 | ||||
-rw-r--r-- | tools/build/feature/test-cxx.cpp | 15 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 4 | ||||
-rw-r--r-- | tools/lib/traceevent/kbuffer-parse.c | 1 | ||||
-rw-r--r-- | tools/perf/Documentation/tips.txt | 4 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/util/sym-handling.c | 3 | ||||
-rw-r--r-- | tools/perf/pmu-events/jevents.c | 2 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 38 |
13 files changed, 85 insertions, 9 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 92a8308b96f6..1188bc849ee3 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -106,7 +106,6 @@ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 2ec0b0abbfaa..49e6ebac7e73 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -181,11 +181,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe - memory copy with machine check exception handling + * memcpy_mcsafe_unrolled - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe) +ENTRY(memcpy_mcsafe_unrolled) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe) +ENDPROC(memcpy_mcsafe_unrolled) .section .fixup, "ax" /* Return -EFAULT for any failure */ diff --git a/tools/build/Build.include b/tools/build/Build.include index 02489380d79b..1dcb95e76f70 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -90,6 +90,7 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ # - per object C flags # - BUILD_STR macro to allow '-D"$(variable)"' constructs c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) ### ## HOSTCC C flags diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 190519a94ce5..99c0ccd2f176 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -61,6 +61,9 @@ quiet_cmd_cc_o_c = CC $@ quiet_cmd_host_cc_o_c = HOSTCC $@ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< +quiet_cmd_cxx_o_c = CXX $@ + cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< + quiet_cmd_cpp_i_c = CPP $@ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< @@ -88,6 +91,10 @@ $(OUTPUT)%.o: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,$(host)cc_o_c) +$(OUTPUT)%.o: %.cpp FORCE + $(call rule_mkdir) + $(call if_changed_dep,cxx_o_c) + $(OUTPUT)%.o: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,$(host)cc_o_c) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index a120c6b755a9..ae52e029dd22 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -7,7 +7,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index a0b29a311816..ac9c477a2a48 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -46,11 +46,13 @@ FILES= \ test-lzma.bin \ test-bpf.bin \ test-get_cpuid.bin \ - test-sdt.bin + test-sdt.bin \ + test-cxx.bin FILES := $(addprefix $(OUTPUT),$(FILES)) CC := $(CROSS_COMPILE)gcc -MD +CXX := $(CROSS_COMPILE)g++ -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) @@ -58,6 +60,9 @@ all: $(FILES) __BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 +__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) + BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 + ############################### $(OUTPUT)test-all.bin: @@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin: $(OUTPUT)test-sdt.bin: $(BUILD) +$(OUTPUT)test-cxx.bin: + $(BUILDXX) -std=gnu++11 + -include $(OUTPUT)*.d ############################### diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp new file mode 100644 index 000000000000..b1dee9a31d6c --- /dev/null +++ b/tools/build/feature/test-cxx.cpp @@ -0,0 +1,15 @@ +#include <iostream> +#include <memory> + +static void print_str(std::string s) +{ + std::cout << s << std::endl; +} + +int main() +{ + std::string s("Hello World!"); + print_str(std::move(s)); + std::cout << "|" << s << "|" << std::endl; + return 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index da218fec6056..9e5fc168c8a3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -339,7 +339,7 @@ enum bpf_func_id { BPF_FUNC_skb_change_type, /** - * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb * @skb: pointer to skb * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type * @index: index of the cgroup in the bpf_map @@ -348,7 +348,7 @@ enum bpf_func_id { * == 1 skb succeeded the cgroup2 descendant test * < 0 error */ - BPF_FUNC_skb_in_cgroup, + BPF_FUNC_skb_under_cgroup, /** * bpf_get_hash_recalc(skb) diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index 3bcada3ae05a..65984f1c2974 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, /* Reset the buffer */ kbuffer_load_subbuffer(kbuf, kbuf->subbuffer); + data = kbuffer_read_event(kbuf, ts); while (kbuf->curr < offset) { data = kbuffer_next_event(kbuf, ts); diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 5950b5a24efd..8a6479c0eac9 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate <symbol> If you prefer Intel style assembly, try: perf annotate -M intel For hierarchical output, try: perf report --hierarchy +Order by the overhead of source file name and line number: perf report -s srcline +System-wide collection from all CPUs: perf record -a +Show current config key-value pairs: perf config --list +Show user configuration overrides: perf config --user --list diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index ed9d5d15d5b6..1030a6e504bb 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || pev->point.retprobe || !map || !sym) + if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || + !map || !sym) return; lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 79c2133bc534..41611d7f9873 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -312,6 +312,8 @@ static struct fixed { const char *event; } fixed[] = { { "inst_retired.any", "event=0xc0" }, + { "inst_retired.any_p", "event=0xc0" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, { "cpu_clk_unhalted.thread", "event=0x3c" }, { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, { NULL, NULL}, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 7591a0c37473..16c06d3ae577 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -90,6 +90,7 @@ struct intel_pt_decoder { bool pge; bool have_tma; bool have_cyc; + bool fixup_last_mtc; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info { uint64_t tsc_timestamp; uint64_t timestamp; bool have_tma; + bool fixup_last_mtc; bool from_mtc; double cbr_cyc_to_tsc; }; +/* + * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower + * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC + * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA + * packet by copying the missing bits from the current MTC assuming the least + * difference between the two, and that the current MTC comes after last_mtc. + */ +static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift, + uint32_t *last_mtc) +{ + uint32_t first_missing_bit = 1U << (16 - mtc_shift); + uint32_t mask = ~(first_missing_bit - 1); + + *last_mtc |= mtc & mask; + if (*last_mtc >= mtc) { + *last_mtc -= first_missing_bit; + *last_mtc &= 0xff; + } +} + static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) { struct intel_pt_decoder *decoder = pkt_info->decoder; @@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) return 0; mtc = pkt_info->packet.payload; + if (decoder->mtc_shift > 8 && data->fixup_last_mtc) { + data->fixup_last_mtc = false; + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, + &data->last_mtc); + } if (mtc > data->last_mtc) mtc_delta = mtc - data->last_mtc; else @@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) data->ctc_delta = 0; data->have_tma = true; + data->fixup_last_mtc = true; return 0; @@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, .tsc_timestamp = decoder->tsc_timestamp, .timestamp = decoder->timestamp, .have_tma = decoder->have_tma, + .fixup_last_mtc = decoder->fixup_last_mtc, .from_mtc = from_mtc, .cbr_cyc_to_tsc = 0, }; @@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) } decoder->ctc_delta = 0; decoder->have_tma = true; + decoder->fixup_last_mtc = true; intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); } @@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) mtc = decoder->packet.payload; + if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) { + decoder->fixup_last_mtc = false; + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, + &decoder->last_mtc); + } + if (mtc > decoder->last_mtc) mtc_delta = mtc - decoder->last_mtc; else @@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) timestamp, decoder->timestamp); else decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; } /* Walk PSB+ packets when already in sync. */ |