summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-10-07 00:36:49 +0200
committerIngo Molnar <mingo@kernel.org>2016-10-07 00:36:49 +0200
commitc68306ce20ad03ce655a367fc33ad06e12bb87a6 (patch)
tree73eb2890678d20f32da70d5f373612e03bd6e509 /tools
parent8657355f5b5f657407efc12a2223e8a3a6d658de (diff)
parent87095f7ddeff3038a0cf8e6574922f9c11688619 (diff)
Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Intel PT timestamp fixes (Adrian Hunter) - Fix Intel JSON fixed counter conversions (Andi Kleen) - Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo) - Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha) - Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or equal the first event (Namhyung Kim) - Fix uretprobe probe placement on ppc64le (Ravi Bangoria) - Support building C++ source files and add feature detection for g++, prep work for supporting a builtin clang/llvm, to remove the need for having that toolchain installed to automagically build BPF scriptlets that then gets uploaded to the kernel via sys_bpf() (Wang Nan) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/lib/memcpy_64.S6
-rw-r--r--tools/build/Build.include1
-rw-r--r--tools/build/Makefile.build7
-rw-r--r--tools/build/Makefile.feature2
-rw-r--r--tools/build/feature/Makefile10
-rw-r--r--tools/build/feature/test-cxx.cpp15
-rw-r--r--tools/include/uapi/linux/bpf.h4
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c1
-rw-r--r--tools/perf/Documentation/tips.txt4
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c3
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c38
13 files changed, 85 insertions, 9 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308b96f6..1188bc849ee3 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 2ec0b0abbfaa..49e6ebac7e73 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)
.section .fixup, "ax"
/* Return -EFAULT for any failure */
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 02489380d79b..1dcb95e76f70 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -90,6 +90,7 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
###
## HOSTCC C flags
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 190519a94ce5..99c0ccd2f176 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -61,6 +61,9 @@ quiet_cmd_cc_o_c = CC $@
quiet_cmd_host_cc_o_c = HOSTCC $@
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
+quiet_cmd_cxx_o_c = CXX $@
+ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
+
quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
@@ -88,6 +91,10 @@ $(OUTPUT)%.o: %.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)
+$(OUTPUT)%.o: %.cpp FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cxx_o_c)
+
$(OUTPUT)%.o: %.S FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index a120c6b755a9..ae52e029dd22 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -7,7 +7,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index a0b29a311816..ac9c477a2a48 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -46,11 +46,13 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-get_cpuid.bin \
- test-sdt.bin
+ test-sdt.bin \
+ test-cxx.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
CC := $(CROSS_COMPILE)gcc -MD
+CXX := $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
@@ -58,6 +60,9 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
+__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
+ BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
+
###############################
$(OUTPUT)test-all.bin:
@@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-sdt.bin:
$(BUILD)
+$(OUTPUT)test-cxx.bin:
+ $(BUILDXX) -std=gnu++11
+
-include $(OUTPUT)*.d
###############################
diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp
new file mode 100644
index 000000000000..b1dee9a31d6c
--- /dev/null
+++ b/tools/build/feature/test-cxx.cpp
@@ -0,0 +1,15 @@
+#include <iostream>
+#include <memory>
+
+static void print_str(std::string s)
+{
+ std::cout << s << std::endl;
+}
+
+int main()
+{
+ std::string s("Hello World!");
+ print_str(std::move(s));
+ std::cout << "|" << s << "|" << std::endl;
+ return 0;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index da218fec6056..9e5fc168c8a3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -339,7 +339,7 @@ enum bpf_func_id {
BPF_FUNC_skb_change_type,
/**
- * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@ enum bpf_func_id {
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
- BPF_FUNC_skb_in_cgroup,
+ BPF_FUNC_skb_under_cgroup,
/**
* bpf_get_hash_recalc(skb)
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index 3bcada3ae05a..65984f1c2974 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
/* Reset the buffer */
kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+ data = kbuffer_read_event(kbuf, ts);
while (kbuf->curr < offset) {
data = kbuffer_next_event(kbuf, ts);
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 5950b5a24efd..8a6479c0eac9 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100
See assembly instructions with percentage: perf annotate <symbol>
If you prefer Intel style assembly, try: perf annotate -M intel
For hierarchical output, try: perf report --hierarchy
+Order by the overhead of source file name and line number: perf report -s srcline
+System-wide collection from all CPUs: perf record -a
+Show current config key-value pairs: perf config --list
+Show user configuration overrides: perf config --user --list
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index ed9d5d15d5b6..1030a6e504bb 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
- if (pev->point.offset || pev->point.retprobe || !map || !sym)
+ if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
+ !map || !sym)
return;
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 79c2133bc534..41611d7f9873 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -312,6 +312,8 @@ static struct fixed {
const char *event;
} fixed[] = {
{ "inst_retired.any", "event=0xc0" },
+ { "inst_retired.any_p", "event=0xc0" },
+ { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
{ "cpu_clk_unhalted.thread", "event=0x3c" },
{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
{ NULL, NULL},
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7591a0c37473..16c06d3ae577 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -90,6 +90,7 @@ struct intel_pt_decoder {
bool pge;
bool have_tma;
bool have_cyc;
+ bool fixup_last_mtc;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info {
uint64_t tsc_timestamp;
uint64_t timestamp;
bool have_tma;
+ bool fixup_last_mtc;
bool from_mtc;
double cbr_cyc_to_tsc;
};
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+ uint32_t *last_mtc)
+{
+ uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+ uint32_t mask = ~(first_missing_bit - 1);
+
+ *last_mtc |= mtc & mask;
+ if (*last_mtc >= mtc) {
+ *last_mtc -= first_missing_bit;
+ *last_mtc &= 0xff;
+ }
+}
+
static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
{
struct intel_pt_decoder *decoder = pkt_info->decoder;
@@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
return 0;
mtc = pkt_info->packet.payload;
+ if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+ data->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &data->last_mtc);
+ }
if (mtc > data->last_mtc)
mtc_delta = mtc - data->last_mtc;
else
@@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
data->ctc_delta = 0;
data->have_tma = true;
+ data->fixup_last_mtc = true;
return 0;
@@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
.tsc_timestamp = decoder->tsc_timestamp,
.timestamp = decoder->timestamp,
.have_tma = decoder->have_tma,
+ .fixup_last_mtc = decoder->fixup_last_mtc,
.from_mtc = from_mtc,
.cbr_cyc_to_tsc = 0,
};
@@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
}
decoder->ctc_delta = 0;
decoder->have_tma = true;
+ decoder->fixup_last_mtc = true;
intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
}
@@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
mtc = decoder->packet.payload;
+ if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+ decoder->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &decoder->last_mtc);
+ }
+
if (mtc > decoder->last_mtc)
mtc_delta = mtc - decoder->last_mtc;
else
@@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
timestamp, decoder->timestamp);
else
decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
}
/* Walk PSB+ packets when already in sync. */