diff options
Diffstat (limited to 'tools')
26 files changed, 1465 insertions, 96 deletions
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index 00ad2d608727..aa5e092c4352 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files: ex/Build: ex-y += a.o ex-y += b.o + ex-y += b.o # duplicates in the lists are allowed libex-y += c.o libex-y += d.o diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 8120af9c0341..0c5f485521d6 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -63,7 +63,7 @@ quiet_cmd_gen = GEN $@ # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) # Build rules $(OUTPUT)%.o: %.c FORCE diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 70d876237c57..429c7d452101 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -1,6 +1,7 @@ ex-y += ex.o ex-y += a.o ex-y += b.o +ex-y += b.o ex-y += empty/ ex-y += empty2/ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 2866b62eb293..4a0501d7a3b4 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -142,19 +142,21 @@ which is the same as -e intel_pt/tsc=1,noretcomp=0/ +Note there are now new config terms - see section 'config terms' further below. + The config terms are listed in /sys/devices/intel_pt/format. They are bit fields within the config member of the struct perf_event_attr which is passed to the kernel by the perf_event_open system call. They correspond to bit fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: - $ for f in `ls /sys/devices/intel_pt/format`;do - > echo $f - > cat /sys/devices/intel_pt/format/$f - > done - noretcomp - config:11 - tsc - config:10 + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 Note that the default config must be overridden for each term i.e. @@ -209,9 +211,185 @@ perf_event_attr is displayed if the -vv option is used e.g. ------------------------------------------------------------ +config terms +------------ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. + + It stops the driver resetting the byte count to zero whenever + enabling the trace (for example on context switches) which in + turn results in no PSB being forced. However some processors + will produce a PSB anyway. + + In any case, there is still a PSB when the trace is enabled for + the first time. + + no_force_psb can be used to slightly decrease the trace size but + may make it harder for the decoder to recover from errors. + + no_force_psb is not selected by default. + + new snapshot option ------------------- +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + To select snapshot mode a new option has been added: -S diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f31f15a5f873..af009bd6e6b7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -41,7 +41,6 @@ tools/include/asm-generic/bitops.h tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/export.h tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index da7d2c15e611..2ca10d796c0b 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -18,6 +18,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <cpuid.h> #include "../../perf.h" #include "../../util/session.h" @@ -99,17 +100,137 @@ static int intel_pt_parse_terms(struct list_head *formats, const char *str, return intel_pt_parse_terms_with_default(formats, str, config); } -static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static u64 intel_pt_masked_bits(u64 mask, u64 bits) { - return 256; + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct perf_evlist *evlist, u64 *res) +{ + struct perf_evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct perf_evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; } static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) { + char buf[256]; + int mtc, mtc_periods = 0, mtc_period; + int psb_cyc, psb_periods, psb_period; + int pos = 0; u64 config; - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config); + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc && mtc_periods) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + return config; } @@ -157,6 +278,15 @@ static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused return INTEL_PT_AUXTRACE_PRIV_SIZE; } +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + static int intel_pt_info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -168,7 +298,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, struct perf_event_mmap_page *pc; struct perf_tsc_conversion tc = { .time_mult = 0, }; bool cap_user_time_zero = false, per_cpu_mmaps; - u64 tsc_bit, noretcomp_bit; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; int err; if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) @@ -177,6 +308,12 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", &noretcomp_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); if (!session->evlist->nr_mmaps) return -EINVAL; @@ -207,6 +344,11 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; return 0; } @@ -239,6 +381,117 @@ static int intel_pt_track_switches(struct perf_evlist *evlist) return 0; } +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct perf_evsel *evsel) +{ + int err; + + if (!evsel) + return 0; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->attr.config); + if (err) + return err; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->attr.config); + if (err) + return err; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->attr.config); +} + static int intel_pt_recording_options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts) @@ -251,6 +504,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, const struct cpu_map *cpus = evlist->cpus; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; u64 tsc_bit; + int err; ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; @@ -281,6 +535,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + /* Set default sizes for snapshot mode */ if (opts->auxtrace_snapshot_mode) { size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); @@ -366,8 +624,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * threads. */ if (have_timing_info && !cpu_map__empty(cpus)) { - int err; - err = intel_pt_track_switches(evlist); if (err == -EPERM) pr_debug2("Unable to select sched:sched_switch\n"); @@ -394,7 +650,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { struct perf_evsel *tracking_evsel; - int err; err = parse_events(evlist, "dummy:u", NULL); if (err) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a32a64ef08e2..8edc205ff9a7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); symbol__delete(al->sym); + dso__reset_find_symbol_cache(al->map->dso); } return 0; } diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c61d14b101e0..c4b99008e2c9 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p) pthread_mutex_unlock(&ui__lock); } +static void tui_progress__finish(void) +{ + int y; + + if (use_browser <= 0) + return; + + ui__refresh_dimensions(false); + pthread_mutex_lock(&ui__lock); + y = SLtt_Screen_Rows / 2 - 2; + SLsmg_set_color(0); + SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' '); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); +} + static struct ui_progress_ops tui_progress__ops = { - .update = tui_progress__update, + .update = tui_progress__update, + .finish = tui_progress__finish, }; void tui_progress__init(void) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c73276db6d6f..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -324,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c index feeaa509dfe4..906d94aa0a24 100644 --- a/tools/perf/util/intel-pt-decoder/inat.c +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -18,7 +18,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#include <asm/insn.h> +#include "insn.h" /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 74a2e312e8a2..611645e903a8 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -20,7 +20,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#include <asm/inat_types.h> +#include "inat_types.h" /* * Internal bits. Don't use bitmasks directly, because these bits are diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 8f72b334aea0..47314a64399c 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -23,8 +23,8 @@ #else #include <string.h> #endif -#include <asm/inat.h> -#include <asm/insn.h> +#include "inat.h" +#include "insn.h" /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index e7814b74caf8..dd12da0f4593 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -21,7 +21,7 @@ */ /* insn_attr_t is defined in inat.h */ -#include <asm/inat.h> +#include "inat.h" struct insn_field { union { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f8ac462fec1a..22ba50224319 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -85,7 +85,10 @@ struct intel_pt_decoder { const unsigned char *buf; size_t len; bool return_compression; + bool mtc_insn; bool pge; + bool have_tma; + bool have_cyc; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -94,20 +97,37 @@ struct intel_pt_decoder { uint64_t tsc_timestamp; uint64_t ref_timestamp; uint64_t ret_addr; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t cycle_cnt; + uint64_t cyc_ref_timestamp; + uint32_t last_mtc; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; + uint32_t tsc_ctc_mult; + uint32_t tsc_slip; + uint32_t ctc_rem_mask; + int mtc_shift; struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; struct intel_pt_pkt packet; struct intel_pt_pkt tnt; int pkt_step; int pkt_len; + int last_packet_type; unsigned int cbr; unsigned int max_non_turbo_ratio; + double max_non_turbo_ratio_fp; + double cbr_cyc_to_tsc; + double calc_cyc_to_tsc; + bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; uint64_t sign_bit; uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; + uint64_t tot_insn_cnt; uint64_t period_insn_cnt; uint64_t period_mask; uint64_t period_ticks; @@ -148,6 +168,13 @@ static void intel_pt_setup_period(struct intel_pt_decoder *decoder) } } +static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) +{ + if (!d) + return 0; + return (t / d) * n + ((t % d) * n) / d; +} + struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) { struct intel_pt_decoder *decoder; @@ -170,10 +197,44 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->period = params->period; decoder->period_type = params->period_type; - decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; intel_pt_setup_period(decoder); + decoder->mtc_shift = params->mtc_period; + decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; + + decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; + decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; + + if (!decoder->tsc_ctc_ratio_n) + decoder->tsc_ctc_ratio_d = 0; + + if (decoder->tsc_ctc_ratio_d) { + if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) + decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / + decoder->tsc_ctc_ratio_d; + + /* + * Allow for timestamps appearing to backwards because a TSC + * packet has slipped past a MTC packet, so allow 2 MTC ticks + * or ... + */ + decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + /* ... or 0x100 paranoia */ + if (decoder->tsc_slip < 0x100) + decoder->tsc_slip = 0x100; + + intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); + intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); + intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); + intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); + intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + return decoder; } @@ -367,6 +428,7 @@ static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) { intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; decoder->pkt_len = 1; decoder->pkt_step = 1; intel_pt_decoder_log_packet(decoder); @@ -399,6 +461,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_NO_PSB; decoder->ref_timestamp = buffer.ref_timestamp; decoder->timestamp = 0; + decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); @@ -460,10 +523,247 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) return ret; } +struct intel_pt_pkt_info { + struct intel_pt_decoder *decoder; + struct intel_pt_pkt packet; + uint64_t pos; + int pkt_len; + int last_packet_type; + void *data; +}; + +typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); + +/* Lookahead packets in current buffer */ +static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, + intel_pt_pkt_cb_t cb, void *data) +{ + struct intel_pt_pkt_info pkt_info; + const unsigned char *buf = decoder->buf; + size_t len = decoder->len; + int ret; + + pkt_info.decoder = decoder; + pkt_info.pos = decoder->pos; + pkt_info.pkt_len = decoder->pkt_step; + pkt_info.last_packet_type = decoder->last_packet_type; + pkt_info.data = data; + + while (1) { + do { + pkt_info.pos += pkt_info.pkt_len; + buf += pkt_info.pkt_len; + len -= pkt_info.pkt_len; + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + if (!ret) + return INTEL_PT_NEED_MORE_BYTES; + if (ret < 0) + return ret; + + pkt_info.pkt_len = ret; + } while (pkt_info.packet.type == INTEL_PT_PAD); + + ret = cb(&pkt_info); + if (ret) + return 0; + + pkt_info.last_packet_type = pkt_info.packet.type; + } +} + +struct intel_pt_calc_cyc_to_tsc_info { + uint64_t cycle_cnt; + unsigned int cbr; + uint32_t last_mtc; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t tsc_timestamp; + uint64_t timestamp; + bool have_tma; + bool from_mtc; + double cbr_cyc_to_tsc; +}; + +static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_decoder *decoder = pkt_info->decoder; + struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; + uint64_t timestamp; + double cyc_to_tsc; + unsigned int cbr; + uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; + + switch (pkt_info->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_PIP: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + return 0; + + case INTEL_PT_MTC: + if (!data->have_tma) + return 0; + + mtc = pkt_info->packet.payload; + if (mtc > data->last_mtc) + mtc_delta = mtc - data->last_mtc; + else + mtc_delta = mtc + 256 - data->last_mtc; + data->ctc_delta += mtc_delta << decoder->mtc_shift; + data->last_mtc = mtc; + + if (decoder->tsc_ctc_mult) { + timestamp = data->ctc_timestamp + + data->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = data->ctc_timestamp + + multdiv(data->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < data->timestamp) + return 1; + + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + data->timestamp = timestamp; + return 0; + } + + break; + + case INTEL_PT_TSC: + timestamp = pkt_info->packet.payload | + (data->timestamp & (0xffULL << 56)); + if (data->from_mtc && timestamp < data->timestamp && + data->timestamp - timestamp < decoder->tsc_slip) + return 1; + while (timestamp < data->timestamp) + timestamp += (1ULL << 56); + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + if (data->from_mtc) + return 1; + data->tsc_timestamp = timestamp; + data->timestamp = timestamp; + return 0; + } + break; + + case INTEL_PT_TMA: + if (data->from_mtc) + return 1; + + if (!decoder->tsc_ctc_ratio_d) + return 0; + + ctc = pkt_info->packet.payload; + fc = pkt_info->packet.count; + ctc_rem = ctc & decoder->ctc_rem_mask; + + data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + + data->ctc_timestamp = data->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + data->ctc_timestamp -= + multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + data->ctc_delta = 0; + data->have_tma = true; + + return 0; + + case INTEL_PT_CYC: + data->cycle_cnt += pkt_info->packet.payload; + return 0; + + case INTEL_PT_CBR: + cbr = pkt_info->packet.payload; + if (data->cbr && data->cbr != cbr) + return 1; + data->cbr = cbr; + data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: /* Does not happen */ + default: + return 1; + } + + if (!data->cbr && decoder->cbr) { + data->cbr = decoder->cbr; + data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; + } + + if (!data->cycle_cnt) + return 1; + + cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; + + if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && + cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + return 1; + } + + decoder->calc_cyc_to_tsc = cyc_to_tsc; + decoder->have_calc_cyc_to_tsc = true; + + if (data->cbr) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + } else { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", + cyc_to_tsc, pkt_info->pos); + } + + return 1; +} + +static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, + bool from_mtc) +{ + struct intel_pt_calc_cyc_to_tsc_info data = { + .cycle_cnt = 0, + .cbr = 0, + .last_mtc = decoder->last_mtc, + .ctc_timestamp = decoder->ctc_timestamp, + .ctc_delta = decoder->ctc_delta, + .tsc_timestamp = decoder->tsc_timestamp, + .timestamp = decoder->timestamp, + .have_tma = decoder->have_tma, + .from_mtc = from_mtc, + .cbr_cyc_to_tsc = 0, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); +} + static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) { int ret; + decoder->last_packet_type = decoder->packet.type; + do { decoder->pos += decoder->pkt_step; decoder->buf += decoder->pkt_step; @@ -522,6 +822,7 @@ static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) case INTEL_PT_PERIOD_TICKS: return intel_pt_next_period(decoder); case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: default: return 0; } @@ -541,6 +842,7 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) decoder->last_masked_timestamp = masked_timestamp; break; case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: default: break; } @@ -554,11 +856,15 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, uint64_t max_insn_cnt, insn_cnt = 0; int err; + if (!decoder->mtc_insn) + decoder->mtc_insn = true; + max_insn_cnt = intel_pt_next_sample(decoder); err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, max_insn_cnt, decoder->data); + decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; @@ -859,6 +1165,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; + decoder->have_tma = false; + if (decoder->ref_timestamp) { timestamp = decoder->packet.payload | (decoder->ref_timestamp & (0xffULL << 56)); @@ -876,21 +1184,29 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) } else if (decoder->timestamp) { timestamp = decoder->packet.payload | (decoder->timestamp & (0xffULL << 56)); + decoder->tsc_timestamp = timestamp; if (timestamp < decoder->timestamp && - decoder->timestamp - timestamp < 0x100) { - intel_pt_log_to("ERROR: Suppressing backwards timestamp", + decoder->timestamp - timestamp < decoder->tsc_slip) { + intel_pt_log_to("Suppressing backwards timestamp", timestamp); timestamp = decoder->timestamp; } while (timestamp < decoder->timestamp) { intel_pt_log_to("Wraparound timestamp", timestamp); timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; } - decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; } + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, false); + } + intel_pt_log_to("Setting timestamp", decoder->timestamp); } @@ -898,11 +1214,117 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + decoder->cbr = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; } +static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) +{ + uint32_t ctc = decoder->packet.payload; + uint32_t fc = decoder->packet.count; + uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; + + if (!decoder->tsc_ctc_ratio_d) + return; + + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->ctc_timestamp = decoder->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + decoder->ctc_timestamp -= multdiv(ctc_rem, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + decoder->ctc_delta = 0; + decoder->have_tma = true; + intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", + decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); +} + +static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + uint32_t mtc, mtc_delta; + + if (!decoder->have_tma) + return; + + mtc = decoder->packet.payload; + + if (mtc > decoder->last_mtc) + mtc_delta = mtc - decoder->last_mtc; + else + mtc_delta = mtc + 256 - decoder->last_mtc; + + decoder->ctc_delta += mtc_delta << decoder->mtc_shift; + + if (decoder->tsc_ctc_mult) { + timestamp = decoder->ctc_timestamp + + decoder->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = decoder->ctc_timestamp + + multdiv(decoder->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; + decoder->last_mtc = mtc; + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, true); + } +} + +static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) +{ + unsigned int cbr = decoder->packet.payload; + + if (decoder->cbr == cbr) + return; + + decoder->cbr = cbr; + decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; +} + +static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp = decoder->cyc_ref_timestamp; + + decoder->have_cyc = true; + + decoder->cycle_cnt += decoder->packet.payload; + + if (!decoder->cyc_ref_timestamp) + return; + + if (decoder->have_calc_cyc_to_tsc) + timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; + else if (decoder->cbr) + timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; + else + return; + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; +} + /* Walk PSB+ packets when already in sync. */ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) { @@ -921,8 +1343,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: case INTEL_PT_TNT: + case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: + decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); return -EAGAIN; @@ -933,8 +1357,12 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_MODE_EXEC: @@ -942,7 +1370,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_FUP: @@ -954,6 +1382,15 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) intel_pt_update_in_tx(decoder); break; + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; @@ -981,8 +1418,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TNT: case INTEL_PT_FUP: + case INTEL_PT_TRACESTOP: case INTEL_PT_PSB: case INTEL_PT_TSC: + case INTEL_PT_TMA: case INTEL_PT_CBR: case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: @@ -1030,13 +1469,25 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) return 0; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); break; case INTEL_PT_MODE_EXEC: decoder->exec_mode = decoder->packet.payload; break; + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: break; @@ -1120,6 +1571,13 @@ next: } return intel_pt_walk_fup_tip(decoder); + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + case INTEL_PT_PSB: intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); @@ -1130,15 +1588,43 @@ next: break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type != INTEL_PT_PERIOD_MTC) + break; + /* + * Ensure that there has been an instruction since the + * last MTC. + */ + if (!decoder->mtc_insn) + break; + decoder->mtc_insn = false; + /* Ensure that there is a timestamp */ + if (!decoder->timestamp) + break; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->mtc_insn = false; + return 0; + case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_MODE_EXEC: @@ -1160,6 +1646,8 @@ next: return intel_pt_bug(decoder); case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: break; @@ -1200,16 +1688,28 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) } break; + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_MODE_EXEC: @@ -1220,7 +1720,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) intel_pt_update_in_tx(decoder); break; + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); case INTEL_PT_TNT: + decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); if (decoder->ip) decoder->pkt_state = INTEL_PT_STATE_ERR4; @@ -1238,6 +1743,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) return 0; case INTEL_PT_PSB: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; @@ -1280,16 +1787,28 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) intel_pt_set_last_ip(decoder); break; + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_MODE_EXEC: @@ -1306,6 +1825,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_BAD: /* Does not happen */ return intel_pt_bug(decoder); + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; + break; + case INTEL_PT_PSB: err = intel_pt_walk_psb(decoder); if (err) @@ -1319,6 +1845,8 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TNT: case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; @@ -1529,6 +2057,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.timestamp = decoder->timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; + decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; if (err) decoder->state.from_ip = decoder->ip; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4c4880230cc9..02c38fec1c37 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -36,6 +36,7 @@ enum intel_pt_period_type { INTEL_PT_PERIOD_NONE, INTEL_PT_PERIOD_INSTRUCTIONS, INTEL_PT_PERIOD_TICKS, + INTEL_PT_PERIOD_MTC, }; enum { @@ -58,6 +59,7 @@ struct intel_pt_state { uint64_t from_ip; uint64_t to_ip; uint64_t cr3; + uint64_t tot_insn_cnt; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; @@ -86,6 +88,9 @@ struct intel_pt_params { uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; + unsigned int mtc_period; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 46980fc663ac..9e4eb8fcd559 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -20,7 +20,7 @@ #include "event.h" -#include <asm/insn.h> +#include "insn.h" #include "inat.c" #include "insn.c" diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 988c82c6652d..b1257c816310 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -24,6 +24,8 @@ #define BIT63 ((uint64_t)1 << 63) +#define NR_FLAG BIT63 + #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -46,15 +48,21 @@ static const char * const packet_name[] = { [INTEL_PT_TIP_PGD] = "TIP.PGD", [INTEL_PT_TIP_PGE] = "TIP.PGE", [INTEL_PT_TSC] = "TSC", + [INTEL_PT_TMA] = "TMA", [INTEL_PT_MODE_EXEC] = "MODE.Exec", [INTEL_PT_MODE_TSX] = "MODE.TSX", + [INTEL_PT_MTC] = "MTC", [INTEL_PT_TIP] = "TIP", [INTEL_PT_FUP] = "FUP", + [INTEL_PT_CYC] = "CYC", + [INTEL_PT_VMCS] = "VMCS", [INTEL_PT_PSB] = "PSB", [INTEL_PT_PSBEND] = "PSBEND", [INTEL_PT_CBR] = "CBR", + [INTEL_PT_TRACESTOP] = "TraceSTOP", [INTEL_PT_PIP] = "PIP", [INTEL_PT_OVF] = "OVF", + [INTEL_PT_MNT] = "MNT", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -96,10 +104,18 @@ static int intel_pt_get_pip(const unsigned char *buf, size_t len, packet->type = INTEL_PT_PIP; memcpy_le64(&payload, buf + 2, 6); packet->payload = payload >> 1; + if (payload & 1) + packet->payload |= NR_FLAG; return 8; } +static int intel_pt_get_tracestop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_TRACESTOP; + return 2; +} + static int intel_pt_get_cbr(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -110,6 +126,24 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, return 4; } +static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int count = (52 - 5) >> 3; + + if (count < 1 || count > 7) + return INTEL_PT_BAD_PACKET; + + if (len < count + 2) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_VMCS; + packet->count = count; + memcpy_le64(&packet->payload, buf + 2, count); + + return count + 2; +} + static int intel_pt_get_ovf(struct intel_pt_pkt *packet) { packet->type = INTEL_PT_OVF; @@ -139,12 +173,49 @@ static int intel_pt_get_psbend(struct intel_pt_pkt *packet) return 2; } +static int intel_pt_get_tma(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_TMA; + packet->payload = buf[2] | (buf[3] << 8); + packet->count = buf[5] | ((buf[6] & BIT(0)) << 8); + return 7; +} + static int intel_pt_get_pad(struct intel_pt_pkt *packet) { packet->type = INTEL_PT_PAD; return 1; } +static int intel_pt_get_mnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 11) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MNT; + memcpy_le64(&packet->payload, buf + 3, 8); + return 11 +; +} + +static int intel_pt_get_3byte(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[2]) { + case 0x88: /* MNT */ + return intel_pt_get_mnt(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -156,14 +227,22 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_long_tnt(buf, len, packet); case 0x43: /* PIP */ return intel_pt_get_pip(buf, len, packet); + case 0x83: /* TraceStop */ + return intel_pt_get_tracestop(packet); case 0x03: /* CBR */ return intel_pt_get_cbr(buf, len, packet); + case 0xc8: /* VMCS */ + return intel_pt_get_vmcs(buf, len, packet); case 0xf3: /* OVF */ return intel_pt_get_ovf(packet); case 0x82: /* PSB */ return intel_pt_get_psb(buf, len, packet); case 0x23: /* PSBEND */ return intel_pt_get_psbend(packet); + case 0x73: /* TMA */ + return intel_pt_get_tma(buf, len, packet); + case 0xC3: /* 3-byte header */ + return intel_pt_get_3byte(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -187,6 +266,28 @@ static int intel_pt_get_short_tnt(unsigned int byte, return 1; } +static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, + size_t len, struct intel_pt_pkt *packet) +{ + unsigned int offs = 1, shift; + uint64_t payload = byte >> 3; + + byte >>= 2; + len -= 1; + for (shift = 5; byte & 1; shift += 7) { + if (offs > 9) + return INTEL_PT_BAD_PACKET; + if (len < offs) + return INTEL_PT_NEED_MORE_BYTES; + byte = buf[offs++]; + payload |= (byte >> 1) << shift; + } + + packet->type = INTEL_PT_CYC; + packet->payload = payload; + return offs; +} + static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) @@ -269,6 +370,16 @@ static int intel_pt_get_tsc(const unsigned char *buf, size_t len, return 8; } +static int intel_pt_get_mtc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MTC; + packet->payload = buf[1]; + return 2; +} + static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -288,6 +399,9 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, return intel_pt_get_short_tnt(byte, packet); } + if ((byte & 2)) + return intel_pt_get_cyc(byte, buf, len, packet); + switch (byte & 0x1f) { case 0x0D: return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); @@ -305,6 +419,8 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, return intel_pt_get_mode(buf, len, packet); case 0x19: return intel_pt_get_tsc(buf, len, packet); + case 0x59: + return intel_pt_get_mtc(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -329,7 +445,7 @@ int intel_pt_get_packet(const unsigned char *buf, size_t len, int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t buf_len) { - int ret, i; + int ret, i, nr; unsigned long long payload = packet->payload; const char *name = intel_pt_pkt_name(packet->type); @@ -338,6 +454,7 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, case INTEL_PT_PAD: case INTEL_PT_PSB: case INTEL_PT_PSBEND: + case INTEL_PT_TRACESTOP: case INTEL_PT_OVF: return snprintf(buf, buf_len, "%s", name); case INTEL_PT_TNT: { @@ -371,17 +488,16 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, case INTEL_PT_FUP: if (!(packet->count)) return snprintf(buf, buf_len, "%s no ip", name); + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MTC: + case INTEL_PT_MNT: case INTEL_PT_CBR: - return snprintf(buf, buf_len, "%s 0x%llx", name, payload); case INTEL_PT_TSC: - if (packet->count) - return snprintf(buf, buf_len, - "%s 0x%llx CTC 0x%x FC 0x%x", - name, payload, packet->count & 0xffff, - (packet->count >> 16) & 0x1ff); - else - return snprintf(buf, buf_len, "%s 0x%llx", - name, payload); + return snprintf(buf, buf_len, "%s 0x%llx", name, payload); + case INTEL_PT_TMA: + return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name, + (unsigned)payload, packet->count); case INTEL_PT_MODE_EXEC: return snprintf(buf, buf_len, "%s %lld", name, payload); case INTEL_PT_MODE_TSX: @@ -389,8 +505,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, name, (unsigned)(payload >> 1) & 1, (unsigned)payload & 1); case INTEL_PT_PIP: - ret = snprintf(buf, buf_len, "%s 0x%llx", - name, payload); + nr = packet->payload & NR_FLAG ? 1 : 0; + payload &= ~NR_FLAG; + ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", + name, payload, nr); return ret; default: break; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 53404fa942b3..781bb79883bd 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -37,15 +37,21 @@ enum intel_pt_pkt_type { INTEL_PT_TIP_PGD, INTEL_PT_TIP_PGE, INTEL_PT_TSC, + INTEL_PT_TMA, INTEL_PT_MODE_EXEC, INTEL_PT_MODE_TSX, + INTEL_PT_MTC, INTEL_PT_TIP, INTEL_PT_FUP, + INTEL_PT_CYC, + INTEL_PT_VMCS, INTEL_PT_PSB, INTEL_PT_PSBEND, INTEL_PT_CBR, + INTEL_PT_TRACESTOP, INTEL_PT_PIP, INTEL_PT_OVF, + INTEL_PT_MNT, }; struct intel_pt_pkt { diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a5acd2fe2447..bb41c20e6005 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -91,6 +91,11 @@ struct intel_pt { bool synth_needs_swap; u64 tsc_bit; + u64 mtc_bit; + u64 mtc_freq_bits; + u32 tsc_ctc_ratio_n; + u32 tsc_ctc_ratio_d; + u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; }; @@ -126,6 +131,7 @@ struct intel_pt_queue { u64 timestamp; u32 flags; u16 insn_len; + u64 last_insn_cnt; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -567,6 +573,25 @@ static bool intel_pt_return_compression(struct intel_pt *pt) return true; } +static unsigned int intel_pt_mtc_period(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + unsigned int shift; + u64 config; + + if (!pt->mtc_freq_bits) + return 0; + + for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) + config >>= 1; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) + return (config & pt->mtc_freq_bits) >> shift; + } + return 0; +} + static bool intel_pt_timeless_decoding(struct intel_pt *pt) { struct perf_evsel *evsel; @@ -667,6 +692,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + params.mtc_period = intel_pt_mtc_period(pt); + params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; + params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; if (pt->synth_opts.instructions) { if (pt->synth_opts.period) { @@ -920,11 +948,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.addr = ptq->state->to_ip; sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->pt->instructions_sample_period; + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; sample.cpu = ptq->cpu; sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; + if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, pt->synth_opts.callchain_sz, sample.ip); @@ -1748,16 +1778,20 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) } static const char * const intel_pt_info_fmts[] = { - [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", - [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", - [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", - [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", - [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", - [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", - [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", - [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", - [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", + [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", + [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", }; static void intel_pt_print_info(u64 *arr, int start, int finish) @@ -1809,6 +1843,17 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, INTEL_PT_PER_CPU_MMAPS); + if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + + (sizeof(u64) * INTEL_PT_CYC_BIT)) { + pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; + pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; + pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; + pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; + pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, + INTEL_PT_CYC_BIT); + } + pt->timeless_decoding = intel_pt_timeless_decoding(pt); pt->have_tsc = intel_pt_have_tsc(pt); pt->sampling_mode = false; diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h index a1bfe93473ba..0065949df693 100644 --- a/tools/perf/util/intel-pt.h +++ b/tools/perf/util/intel-pt.h @@ -29,6 +29,11 @@ enum { INTEL_PT_HAVE_SCHED_SWITCH, INTEL_PT_SNAPSHOT_MODE, INTEL_PT_PER_CPU_MMAPS, + INTEL_PT_MTC_BIT, + INTEL_PT_MTC_FREQ_BITS, + INTEL_PT_TSC_CTC_N, + INTEL_PT_TSC_CTC_D, + INTEL_PT_CYC_BIT, INTEL_PT_AUXTRACE_PRIV_MAX, }; diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 52be201b9b25..b1b9e2385f4b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe) else if (last_ts <= limit) oe->last = list_entry(head->prev, struct ordered_event, list); + if (show_progress) + ui_progress__finish(); + return 0; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f07374bc9c5a..eb5f18b75402 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -515,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, if (ret < 0) goto error; addr += stext; - } else { + } else if (tp->symbol) { addr = kernel_get_symbol_address_by_name(tp->symbol, false); if (addr == 0) goto error; @@ -1194,15 +1194,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) *ptr++ = '\0'; } - tmp = strdup(arg); - if (tmp == NULL) - return -ENOMEM; + if (arg[0] == '\0') + tmp = NULL; + else { + tmp = strdup(arg); + if (tmp == NULL) + return -ENOMEM; + } if (file_spec) pp->file = tmp; - else + else { pp->function = tmp; + /* + * Keep pp->function even if this is absolute address, + * so it can mark whether abs_address is valid. + * Which make 'perf probe lib.bin 0x0' possible. + * + * Note that checking length of tmp is not needed + * because when we access tmp[1] we know tmp[0] is '0', + * so tmp[1] should always valid (but could be '\0'). + */ + if (tmp && !strncmp(tmp, "0x", 2)) { + pp->abs_address = strtoul(pp->function, &tmp, 0); + if (*tmp != '\0') { + semantic_error("Invalid absolute address.\n"); + return -EINVAL; + } + } + } + /* Parse other options */ while (ptr) { arg = ptr; @@ -1519,9 +1541,31 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) } else p = argv[1]; fmt1_str = strtok_r(p, "+", &fmt); - if (fmt1_str[0] == '0') /* only the address started with 0x */ - tp->address = strtoul(fmt1_str, NULL, 0); - else { + /* only the address started with 0x */ + if (fmt1_str[0] == '0') { + /* + * Fix a special case: + * if address == 0, kernel reports something like: + * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax + * Newer kernel may fix that, but we want to + * support old kernel also. + */ + if (strcmp(fmt1_str, "0x") == 0) { + if (!argv[2] || strcmp(argv[2], "(null)")) { + ret = -EINVAL; + goto out; + } + tp->address = 0; + + free(argv[2]); + for (i = 2; argv[i + 1] != NULL; i++) + argv[i] = argv[i + 1]; + + argv[i] = NULL; + argc -= 1; + } else + tp->address = strtoul(fmt1_str, NULL, 0); + } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); if (tp->symbol == NULL) { @@ -1778,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (len <= 0) goto error; - /* Uprobes must have tp->address and tp->module */ - if (tev->uprobes && (!tp->address || !tp->module)) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) goto error; + /* + * If tp->address == 0, then this point must be a + * absolute address uprobe. + * try_to_find_absolute_address() should have made + * tp->symbol to "0x0". + */ + if (tev->uprobes && !tp->address) { + if (!tp->symbol || strcmp(tp->symbol, "0x0")) + goto error; + } /* Use the tp->address for uprobes */ if (tev->uprobes) ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", tp->module, tp->address); + else if (!strncmp(tp->symbol, "0x", 2)) + /* Absolute address. See try_to_find_absolute_address() */ + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", + tp->module ?: "", tp->module ? ":" : "", + tp->address); else ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", @@ -1815,17 +1874,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, { struct symbol *sym = NULL; struct map *map; - u64 addr; + u64 addr = tp->address; int ret = -ENOENT; if (!is_kprobe) { map = dso__new_map(tp->module); if (!map) goto out; - addr = tp->address; sym = map__find_symbol(map, addr, NULL); } else { - addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (tp->symbol) + addr = kernel_get_symbol_address_by_name(tp->symbol, true); if (addr) { addr += tp->offset; sym = __find_kernel_function(addr, &map); @@ -1848,8 +1907,8 @@ out: } static int convert_to_perf_probe_point(struct probe_trace_point *tp, - struct perf_probe_point *pp, - bool is_kprobe) + struct perf_probe_point *pp, + bool is_kprobe) { char buf[128]; int ret; @@ -1866,7 +1925,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, if (tp->symbol) { pp->function = strdup(tp->symbol); pp->offset = tp->offset; - } else if (!tp->module && !is_kprobe) { + } else { ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); if (ret < 0) return ret; @@ -2305,7 +2364,9 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, if (pev->event) event = pev->event; else - if (pev->point.function && !strisglob(pev->point.function)) + if (pev->point.function && + (strncmp(pev->point.function, "0x", 2) != 0) && + !strisglob(pev->point.function)) event = pev->point.function; else event = tev->point.realname; @@ -2572,6 +2633,98 @@ err_out: goto out; } +static int try_to_find_absolute_address(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct perf_probe_point *pp = &pev->point; + struct probe_trace_event *tev; + struct probe_trace_point *tp; + int i, err; + + if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2))) + return -EINVAL; + if (perf_probe_event_need_dwarf(pev)) + return -EINVAL; + + /* + * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at + * absolute address. + * + * Only one tev can be generated by this. + */ + *tevs = zalloc(sizeof(*tev)); + if (!*tevs) + return -ENOMEM; + + tev = *tevs; + tp = &tev->point; + + /* + * Don't use tp->offset, use address directly, because + * in synthesize_probe_trace_command() address cannot be + * zero. + */ + tp->address = pev->point.abs_address; + tp->retprobe = pp->retprobe; + tev->uprobes = pev->uprobes; + + err = -ENOMEM; + /* + * Give it a '0x' leading symbol name. + * In __add_probe_trace_events, a NULL symbol is interpreted as + * invalud. + */ + if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + goto errout; + + /* For kprobe, check range */ + if ((!tev->uprobes) && + (kprobe_warn_out_range(tev->point.symbol, + tev->point.address))) { + err = -EACCES; + goto errout; + } + + if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + goto errout; + + if (pev->target) { + tp->module = strdup(pev->target); + if (!tp->module) + goto errout; + } + + if (tev->group) { + tev->group = strdup(pev->group); + if (!tev->group) + goto errout; + } + + if (pev->event) { + tev->event = strdup(pev->event); + if (!tev->event) + goto errout; + } + + tev->nargs = pev->nargs; + tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); + if (!tev->args) { + err = -ENOMEM; + goto errout; + } + for (i = 0; i < tev->nargs; i++) + copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); + + return 1; + +errout: + if (*tevs) { + clear_probe_trace_events(*tevs, 1); + *tevs = NULL; + } + return err; +} + bool __weak arch__prefers_symtab(void) { return false; } static int convert_to_probe_trace_events(struct perf_probe_event *pev, @@ -2588,6 +2741,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + ret = try_to_find_absolute_address(pev, tevs); + if (ret > 0) + return ret; + if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { ret = find_probe_trace_events_from_map(pev, tevs); if (ret > 0) @@ -2758,3 +2915,22 @@ end: return ret; } +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar) +{ + tvar->value = strdup(pvar->var); + if (tvar->value == NULL) + return -ENOMEM; + if (pvar->type) { + tvar->type = strdup(pvar->type); + if (tvar->type == NULL) + return -ENOMEM; + } + if (pvar->name) { + tvar->name = strdup(pvar->name); + if (tvar->name == NULL) + return -ENOMEM; + } else + tvar->name = NULL; + return 0; +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 83ee95e9743b..6e7ec68a4aa8 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -59,6 +59,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ + unsigned long abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ @@ -156,4 +157,7 @@ int e_snprintf(char *str, size_t size, const char *format, ...) /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar); + #endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 7b80f8cb62b9..29c43c0680a8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) char buf[32], *ptr; int ret = 0; - if (!is_c_varname(pf->pvar->var)) { - /* Copy raw parameters */ - pf->tvar->value = strdup(pf->pvar->var); - if (pf->tvar->value == NULL) - return -ENOMEM; - if (pf->pvar->type) { - pf->tvar->type = strdup(pf->pvar->type); - if (pf->tvar->type == NULL) - return -ENOMEM; - } - if (pf->pvar->name) { - pf->tvar->name = strdup(pf->pvar->name); - if (pf->tvar->name == NULL) - return -ENOMEM; - } else - pf->tvar->name = NULL; - return 0; - } + /* Copy raw parameters */ + if (!is_c_varname(pf->pvar->var)) + return copy_to_probe_trace_arg(pf->tvar, pf->pvar); if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 42e98ab5a9bb..46ae0532a8a6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -441,6 +441,16 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, return &s->sym; } +void dso__reset_find_symbol_cache(struct dso *dso) +{ + enum map_type type; + + for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { + dso->last_find_result[type].addr = 0; + dso->last_find_result[type].symbol = NULL; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { |