summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/build/Documentation/Build.txt1
-rw-r--r--tools/build/Makefile.build2
-rw-r--r--tools/build/tests/ex/Build1
-rw-r--r--tools/perf/Documentation/intel-pt.txt194
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c271
-rw-r--r--tools/perf/builtin-annotate.c1
-rw-r--r--tools/perf/ui/tui/progress.c19
-rw-r--r--tools/perf/util/dso.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/inat_types.h29
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c555
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h5
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c142
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h6
-rw-r--r--tools/perf/util/intel-pt.c67
-rw-r--r--tools/perf/util/intel-pt.h5
-rw-r--r--tools/perf/util/ordered-events.c3
-rw-r--r--tools/perf/util/probe-event.c210
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/probe-finder.c21
-rw-r--r--tools/perf/util/symbol.c10
26 files changed, 1465 insertions, 96 deletions
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt
index 00ad2d608727..aa5e092c4352 100644
--- a/tools/build/Documentation/Build.txt
+++ b/tools/build/Documentation/Build.txt
@@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files:
ex/Build:
ex-y += a.o
ex-y += b.o
+ ex-y += b.o # duplicates in the lists are allowed
libex-y += c.o
libex-y += d.o
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 8120af9c0341..0c5f485521d6 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -63,7 +63,7 @@ quiet_cmd_gen = GEN $@
# If there's nothing to link, create empty $@ object.
quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
- $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@)
+ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
# Build rules
$(OUTPUT)%.o: %.c FORCE
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
index 70d876237c57..429c7d452101 100644
--- a/tools/build/tests/ex/Build
+++ b/tools/build/tests/ex/Build
@@ -1,6 +1,7 @@
ex-y += ex.o
ex-y += a.o
ex-y += b.o
+ex-y += b.o
ex-y += empty/
ex-y += empty2/
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 2866b62eb293..4a0501d7a3b4 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -142,19 +142,21 @@ which is the same as
-e intel_pt/tsc=1,noretcomp=0/
+Note there are now new config terms - see section 'config terms' further below.
+
The config terms are listed in /sys/devices/intel_pt/format. They are bit
fields within the config member of the struct perf_event_attr which is
passed to the kernel by the perf_event_open system call. They correspond to bit
fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
- $ for f in `ls /sys/devices/intel_pt/format`;do
- > echo $f
- > cat /sys/devices/intel_pt/format/$f
- > done
- noretcomp
- config:11
- tsc
- config:10
+ $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+ /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+ /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+ /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+ /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+ /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+ /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+ /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
Note that the default config must be overridden for each term i.e.
@@ -209,9 +211,185 @@ perf_event_attr is displayed if the -vv option is used e.g.
------------------------------------------------------------
+config terms
+------------
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms. All the config terms are
+described below.
+
+tsc Always supported. Produces TSC timestamp packets to provide
+ timing information. In some cases it is possible to decode
+ without timing information, for example a per-thread context
+ that does not overlap executable memory maps.
+
+ The default config selects tsc (i.e. tsc=1).
+
+noretcomp Always supported. Disables "return compression" so a TIP packet
+ is produced when a function returns. Causes more packets to be
+ produced but might make decoding more reliable.
+
+ The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period Allows the frequency of PSB packets to be specified.
+
+ The PSB packet is a synchronization packet that provides a
+ starting point for decoding or recovery from errors.
+
+ Support for psb_period is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and "0"
+ otherwise.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The psb_period value is converted to the approximate number of
+ trace bytes between PSB packets as:
+
+ 2 ^ (value + 11)
+
+ e.g. value 3 means 16KiB bytes between PSBs
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/psb_period=15/u uname
+ Invalid psb_period for intel_pt. Valid values are: 0-5
+
+ If MTC packets are selected, the default config selects a value
+ of 3 (i.e. psb_period=3) or the nearest lower value that is
+ supported (0 is always supported). Otherwise the default is 0.
+
+ If decoding is expected to be reliable and the buffer is large
+ then a large PSB period can be used.
+
+ Because a TSC packet is produced with PSB, the PSB period can
+ also affect the granularity to timing information in the absence
+ of MTC or CYC.
+
+mtc Produces MTC timing packets.
+
+ MTC packets provide finer grain timestamp information than TSC
+ packets. MTC packets record time using the hardware crystal
+ clock (CTC) which is related to TSC packets using a TMA packet.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The frequency of MTC packets can also be specified - see
+ mtc_period below.
+
+mtc_period Specifies how frequently MTC packets are produced - see mtc
+ above for how to determine if MTC packets are supported.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The mtc_period value is converted to the MTC frequency as:
+
+ CTC-frequency / (2 ^ value)
+
+ e.g. value 3 means one eighth of CTC-frequency
+
+ Where CTC is the hardware crystal clock, the frequency of which
+ can be related to TSC via values provided in cpuid leaf 0x15.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/mtc_period=15/u uname
+ Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+ The default value is 3 or the nearest lower value
+ that is supported (0 is always supported).
+
+cyc Produces CYC timing packets.
+
+ CYC packets provide even finer grain timestamp information than
+ MTC and TSC packets. A CYC packet contains the number of CPU
+ cycles since the last CYC packet. Unlike MTC and TSC packets,
+ CYC packets are only sent when another packet is also sent.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The number of CYC packets produced can be reduced by specifying
+ a threshold - see cyc_thresh below.
+
+cyc_thresh Specifies how frequently CYC packets are produced - see cyc
+ above for how to determine if CYC packets are supported.
+
+ Valid cyc_thresh values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The cyc_thresh value represents the minimum number of CPU cycles
+ that must have passed before a CYC packet can be sent. The
+ number of CPU cycles is:
+
+ 2 ^ (value - 1)
+
+ e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+ can be sent. Note a CYC packet is still only sent when another
+ packet is sent, not at, e.g. every 8 CPU cycles.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+ Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+ CYC packets are not requested by default.
+
+no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR.
+
+ It stops the driver resetting the byte count to zero whenever
+ enabling the trace (for example on context switches) which in
+ turn results in no PSB being forced. However some processors
+ will produce a PSB anyway.
+
+ In any case, there is still a PSB when the trace is enabled for
+ the first time.
+
+ no_force_psb can be used to slightly decrease the trace size but
+ may make it harder for the decoder to recover from errors.
+
+ no_force_psb is not selected by default.
+
+
new snapshot option
-------------------
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
To select snapshot mode a new option has been added:
-S
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f31f15a5f873..af009bd6e6b7 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -41,7 +41,6 @@ tools/include/asm-generic/bitops.h
tools/include/linux/atomic.h
tools/include/linux/bitops.h
tools/include/linux/compiler.h
-tools/include/linux/export.h
tools/include/linux/hash.h
tools/include/linux/kernel.h
tools/include/linux/list.h
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index da7d2c15e611..2ca10d796c0b 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -18,6 +18,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <cpuid.h>
#include "../../perf.h"
#include "../../util/session.h"
@@ -99,17 +100,137 @@ static int intel_pt_parse_terms(struct list_head *formats, const char *str,
return intel_pt_parse_terms_with_default(formats, str, config);
}
-static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused,
- struct perf_evlist *evlist __maybe_unused)
+static u64 intel_pt_masked_bits(u64 mask, u64 bits)
{
- return 256;
+ const u64 top_bit = 1ULL << 63;
+ u64 res = 0;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (mask & top_bit) {
+ res <<= 1;
+ if (bits & top_bit)
+ res |= 1;
+ }
+ mask <<= 1;
+ bits <<= 1;
+ }
+
+ return res;
+}
+
+static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
+ struct perf_evlist *evlist, u64 *res)
+{
+ struct perf_evsel *evsel;
+ u64 mask;
+
+ *res = 0;
+
+ mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+ if (!mask)
+ return -EINVAL;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type) {
+ *res = intel_pt_masked_bits(mask, evsel->attr.config);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
+ struct perf_evlist *evlist)
+{
+ u64 val;
+ int err, topa_multiple_entries;
+ size_t psb_period;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
+ "%d", &topa_multiple_entries) != 1)
+ topa_multiple_entries = 0;
+
+ /*
+ * Use caps/topa_multiple_entries to indicate early hardware that had
+ * extra frequent PSBs.
+ */
+ if (!topa_multiple_entries) {
+ psb_period = 256;
+ goto out;
+ }
+
+ err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
+ if (err)
+ val = 0;
+
+ psb_period = 1 << (val + 11);
+out:
+ pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
+ return psb_period;
+}
+
+static int intel_pt_pick_bit(int bits, int target)
+{
+ int pos, pick = -1;
+
+ for (pos = 0; bits; bits >>= 1, pos++) {
+ if (bits & 1) {
+ if (pos <= target || pick < 0)
+ pick = pos;
+ if (pos >= target)
+ break;
+ }
+ }
+
+ return pick;
}
static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
{
+ char buf[256];
+ int mtc, mtc_periods = 0, mtc_period;
+ int psb_cyc, psb_periods, psb_period;
+ int pos = 0;
u64 config;
- intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
+ &mtc) != 1)
+ mtc = 1;
+
+ if (mtc) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
+ &mtc_periods) != 1)
+ mtc_periods = 0;
+ if (mtc_periods) {
+ mtc_period = intel_pt_pick_bit(mtc_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",mtc,mtc_period=%d", mtc_period);
+ }
+ }
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
+ &psb_cyc) != 1)
+ psb_cyc = 1;
+
+ if (psb_cyc && mtc_periods) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
+ &psb_periods) != 1)
+ psb_periods = 0;
+ if (psb_periods) {
+ psb_period = intel_pt_pick_bit(psb_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",psb_period=%d", psb_period);
+ }
+ }
+
+ pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+
return config;
}
@@ -157,6 +278,15 @@ static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused
return INTEL_PT_AUXTRACE_PRIV_SIZE;
}
+static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ *n = ebx;
+ *d = eax;
+}
+
static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
@@ -168,7 +298,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_event_mmap_page *pc;
struct perf_tsc_conversion tc = { .time_mult = 0, };
bool cap_user_time_zero = false, per_cpu_mmaps;
- u64 tsc_bit, noretcomp_bit;
+ u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
+ u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
int err;
if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
@@ -177,6 +308,12 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
&noretcomp_bit);
+ intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+ mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
+ "mtc_period");
+ intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+
+ intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
if (!session->evlist->nr_mmaps)
return -EINVAL;
@@ -207,6 +344,11 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
+ auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
+ auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
+ auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
return 0;
}
@@ -239,6 +381,117 @@ static int intel_pt_track_switches(struct perf_evlist *evlist)
return 0;
}
+static void intel_pt_valid_str(char *str, size_t len, u64 valid)
+{
+ unsigned int val, last = 0, state = 1;
+ int p = 0;
+
+ str[0] = '\0';
+
+ for (val = 0; val <= 64; val++, valid >>= 1) {
+ if (valid & 1) {
+ last = val;
+ switch (state) {
+ case 0:
+ p += scnprintf(str + p, len - p, ",");
+ /* Fall through */
+ case 1:
+ p += scnprintf(str + p, len - p, "%u", val);
+ state = 2;
+ break;
+ case 2:
+ state = 3;
+ break;
+ case 3:
+ state = 4;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (state) {
+ case 3:
+ p += scnprintf(str + p, len - p, ",%u", last);
+ state = 0;
+ break;
+ case 4:
+ p += scnprintf(str + p, len - p, "-%u", last);
+ state = 0;
+ break;
+ default:
+ break;
+ }
+ if (state != 1)
+ state = 0;
+ }
+ }
+}
+
+static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
+ const char *caps, const char *name,
+ const char *supported, u64 config)
+{
+ char valid_str[256];
+ unsigned int shift;
+ unsigned long long valid;
+ u64 bits;
+ int ok;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
+ valid = 0;
+
+ if (supported &&
+ perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
+ valid = 0;
+
+ valid |= 1;
+
+ bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+
+ config &= bits;
+
+ for (shift = 0; bits && !(bits & 1); shift++)
+ bits >>= 1;
+
+ config >>= shift;
+
+ if (config > 63)
+ goto out_err;
+
+ if (valid & (1 << config))
+ return 0;
+out_err:
+ intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
+ pr_err("Invalid %s for %s. Valid values are: %s\n",
+ name, INTEL_PT_PMU_NAME, valid_str);
+ return -EINVAL;
+}
+
+static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
+ struct perf_evsel *evsel)
+{
+ int err;
+
+ if (!evsel)
+ return 0;
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
+ "cyc_thresh", "caps/psb_cyc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
+ "mtc_period", "caps/mtc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
+ "psb_period", "caps/psb_cyc",
+ evsel->attr.config);
+}
+
static int intel_pt_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
@@ -251,6 +504,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
const struct cpu_map *cpus = evlist->cpus;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
u64 tsc_bit;
+ int err;
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
@@ -281,6 +535,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
+ err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
+ if (err)
+ return err;
+
/* Set default sizes for snapshot mode */
if (opts->auxtrace_snapshot_mode) {
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
@@ -366,8 +624,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* threads.
*/
if (have_timing_info && !cpu_map__empty(cpus)) {
- int err;
-
err = intel_pt_track_switches(evlist);
if (err == -EPERM)
pr_debug2("Unable to select sched:sched_switch\n");
@@ -394,7 +650,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
struct perf_evsel *tracking_evsel;
- int err;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index a32a64ef08e2..8edc205ff9a7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
rb_erase(&al->sym->rb_node,
&al->map->dso->symbols[al->map->type]);
symbol__delete(al->sym);
+ dso__reset_find_symbol_cache(al->map->dso);
}
return 0;
}
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index c61d14b101e0..c4b99008e2c9 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p)
pthread_mutex_unlock(&ui__lock);
}
+static void tui_progress__finish(void)
+{
+ int y;
+
+ if (use_browser <= 0)
+ return;
+
+ ui__refresh_dimensions(false);
+ pthread_mutex_lock(&ui__lock);
+ y = SLtt_Screen_Rows / 2 - 2;
+ SLsmg_set_color(0);
+ SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' ');
+ SLsmg_refresh();
+ pthread_mutex_unlock(&ui__lock);
+}
+
static struct ui_progress_ops tui_progress__ops =
{
- .update = tui_progress__update,
+ .update = tui_progress__update,
+ .finish = tui_progress__finish,
};
void tui_progress__init(void)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c73276db6d6f..fc8db9c764ac 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -324,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
struct dso *dsos__findnew(struct dsos *dsos, const char *name);
bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+void dso__reset_find_symbol_cache(struct dso *dso);
+
size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
size_t __dsos__fprintf(struct list_head *head, FILE *fp);
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c
index feeaa509dfe4..906d94aa0a24 100644
--- a/tools/perf/util/intel-pt-decoder/inat.c
+++ b/tools/perf/util/intel-pt-decoder/inat.c
@@ -18,7 +18,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
-#include <asm/insn.h>
+#include "insn.h"
/* Attribute tables are generated from opcode map */
#include "inat-tables.c"
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 74a2e312e8a2..611645e903a8 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -20,7 +20,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
-#include <asm/inat_types.h>
+#include "inat_types.h"
/*
* Internal bits. Don't use bitmasks directly, because these bits are
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
index 8f72b334aea0..47314a64399c 100644
--- a/tools/perf/util/intel-pt-decoder/insn.c
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -23,8 +23,8 @@
#else
#include <string.h>
#endif
-#include <asm/inat.h>
-#include <asm/insn.h>
+#include "inat.h"
+#include "insn.h"
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e7814b74caf8..dd12da0f4593 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -21,7 +21,7 @@
*/
/* insn_attr_t is defined in inat.h */
-#include <asm/inat.h>
+#include "inat.h"
struct insn_field {
union {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f8ac462fec1a..22ba50224319 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -85,7 +85,10 @@ struct intel_pt_decoder {
const unsigned char *buf;
size_t len;
bool return_compression;
+ bool mtc_insn;
bool pge;
+ bool have_tma;
+ bool have_cyc;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@@ -94,20 +97,37 @@ struct intel_pt_decoder {
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
uint64_t ret_addr;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t cycle_cnt;
+ uint64_t cyc_ref_timestamp;
+ uint32_t last_mtc;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ uint32_t tsc_ctc_mult;
+ uint32_t tsc_slip;
+ uint32_t ctc_rem_mask;
+ int mtc_shift;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
int pkt_len;
+ int last_packet_type;
unsigned int cbr;
unsigned int max_non_turbo_ratio;
+ double max_non_turbo_ratio_fp;
+ double cbr_cyc_to_tsc;
+ double calc_cyc_to_tsc;
+ bool have_calc_cyc_to_tsc;
int exec_mode;
unsigned int insn_bytes;
uint64_t sign_bit;
uint64_t sign_bits;
uint64_t period;
enum intel_pt_period_type period_type;
+ uint64_t tot_insn_cnt;
uint64_t period_insn_cnt;
uint64_t period_mask;
uint64_t period_ticks;
@@ -148,6 +168,13 @@ static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
}
}
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+ if (!d)
+ return 0;
+ return (t / d) * n + ((t % d) * n) / d;
+}
+
struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
{
struct intel_pt_decoder *decoder;
@@ -170,10 +197,44 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->period = params->period;
decoder->period_type = params->period_type;
- decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
intel_pt_setup_period(decoder);
+ decoder->mtc_shift = params->mtc_period;
+ decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+ decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+ decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+ if (!decoder->tsc_ctc_ratio_n)
+ decoder->tsc_ctc_ratio_d = 0;
+
+ if (decoder->tsc_ctc_ratio_d) {
+ if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+ decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+ decoder->tsc_ctc_ratio_d;
+
+ /*
+ * Allow for timestamps appearing to backwards because a TSC
+ * packet has slipped past a MTC packet, so allow 2 MTC ticks
+ * or ...
+ */
+ decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ /* ... or 0x100 paranoia */
+ if (decoder->tsc_slip < 0x100)
+ decoder->tsc_slip = 0x100;
+
+ intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+ intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+ intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+ intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
return decoder;
}
@@ -367,6 +428,7 @@ static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
{
intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
decoder->pkt_len = 1;
decoder->pkt_step = 1;
intel_pt_decoder_log_packet(decoder);
@@ -399,6 +461,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
decoder->ref_timestamp = buffer.ref_timestamp;
decoder->timestamp = 0;
+ decoder->have_tma = false;
decoder->state.trace_nr = buffer.trace_nr;
intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
decoder->ref_timestamp);
@@ -460,10 +523,247 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
return ret;
}
+struct intel_pt_pkt_info {
+ struct intel_pt_decoder *decoder;
+ struct intel_pt_pkt packet;
+ uint64_t pos;
+ int pkt_len;
+ int last_packet_type;
+ void *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+ intel_pt_pkt_cb_t cb, void *data)
+{
+ struct intel_pt_pkt_info pkt_info;
+ const unsigned char *buf = decoder->buf;
+ size_t len = decoder->len;
+ int ret;
+
+ pkt_info.decoder = decoder;
+ pkt_info.pos = decoder->pos;
+ pkt_info.pkt_len = decoder->pkt_step;
+ pkt_info.last_packet_type = decoder->last_packet_type;
+ pkt_info.data = data;
+
+ while (1) {
+ do {
+ pkt_info.pos += pkt_info.pkt_len;
+ buf += pkt_info.pkt_len;
+ len -= pkt_info.pkt_len;
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ if (!ret)
+ return INTEL_PT_NEED_MORE_BYTES;
+ if (ret < 0)
+ return ret;
+
+ pkt_info.pkt_len = ret;
+ } while (pkt_info.packet.type == INTEL_PT_PAD);
+
+ ret = cb(&pkt_info);
+ if (ret)
+ return 0;
+
+ pkt_info.last_packet_type = pkt_info.packet.type;
+ }
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+ uint64_t cycle_cnt;
+ unsigned int cbr;
+ uint32_t last_mtc;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t tsc_timestamp;
+ uint64_t timestamp;
+ bool have_tma;
+ bool from_mtc;
+ double cbr_cyc_to_tsc;
+};
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_decoder *decoder = pkt_info->decoder;
+ struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+ uint64_t timestamp;
+ double cyc_to_tsc;
+ unsigned int cbr;
+ uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PIP:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PAD:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ return 0;
+
+ case INTEL_PT_MTC:
+ if (!data->have_tma)
+ return 0;
+
+ mtc = pkt_info->packet.payload;
+ if (mtc > data->last_mtc)
+ mtc_delta = mtc - data->last_mtc;
+ else
+ mtc_delta = mtc + 256 - data->last_mtc;
+ data->ctc_delta += mtc_delta << decoder->mtc_shift;
+ data->last_mtc = mtc;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = data->ctc_timestamp +
+ data->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = data->ctc_timestamp +
+ multdiv(data->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < data->timestamp)
+ return 1;
+
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ data->timestamp = timestamp;
+ return 0;
+ }
+
+ break;
+
+ case INTEL_PT_TSC:
+ timestamp = pkt_info->packet.payload |
+ (data->timestamp & (0xffULL << 56));
+ if (data->from_mtc && timestamp < data->timestamp &&
+ data->timestamp - timestamp < decoder->tsc_slip)
+ return 1;
+ while (timestamp < data->timestamp)
+ timestamp += (1ULL << 56);
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ if (data->from_mtc)
+ return 1;
+ data->tsc_timestamp = timestamp;
+ data->timestamp = timestamp;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TMA:
+ if (data->from_mtc)
+ return 1;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return 0;
+
+ ctc = pkt_info->packet.payload;
+ fc = pkt_info->packet.count;
+ ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+ data->ctc_timestamp = data->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ data->ctc_timestamp -=
+ multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ data->ctc_delta = 0;
+ data->have_tma = true;
+
+ return 0;
+
+ case INTEL_PT_CYC:
+ data->cycle_cnt += pkt_info->packet.payload;
+ return 0;
+
+ case INTEL_PT_CBR:
+ cbr = pkt_info->packet.payload;
+ if (data->cbr && data->cbr != cbr)
+ return 1;
+ data->cbr = cbr;
+ data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ return 1;
+ }
+
+ if (!data->cbr && decoder->cbr) {
+ data->cbr = decoder->cbr;
+ data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+ }
+
+ if (!data->cycle_cnt)
+ return 1;
+
+ cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+ if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+ cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ return 1;
+ }
+
+ decoder->calc_cyc_to_tsc = cyc_to_tsc;
+ decoder->have_calc_cyc_to_tsc = true;
+
+ if (data->cbr) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ } else {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+ cyc_to_tsc, pkt_info->pos);
+ }
+
+ return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+ bool from_mtc)
+{
+ struct intel_pt_calc_cyc_to_tsc_info data = {
+ .cycle_cnt = 0,
+ .cbr = 0,
+ .last_mtc = decoder->last_mtc,
+ .ctc_timestamp = decoder->ctc_timestamp,
+ .ctc_delta = decoder->ctc_delta,
+ .tsc_timestamp = decoder->tsc_timestamp,
+ .timestamp = decoder->timestamp,
+ .have_tma = decoder->have_tma,
+ .from_mtc = from_mtc,
+ .cbr_cyc_to_tsc = 0,
+ };
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
{
int ret;
+ decoder->last_packet_type = decoder->packet.type;
+
do {
decoder->pos += decoder->pkt_step;
decoder->buf += decoder->pkt_step;
@@ -522,6 +822,7 @@ static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
case INTEL_PT_PERIOD_TICKS:
return intel_pt_next_period(decoder);
case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
default:
return 0;
}
@@ -541,6 +842,7 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
decoder->last_masked_timestamp = masked_timestamp;
break;
case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
default:
break;
}
@@ -554,11 +856,15 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
uint64_t max_insn_cnt, insn_cnt = 0;
int err;
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
max_insn_cnt = intel_pt_next_sample(decoder);
err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
max_insn_cnt, decoder->data);
+ decoder->tot_insn_cnt += insn_cnt;
decoder->timestamp_insn_cnt += insn_cnt;
decoder->period_insn_cnt += insn_cnt;
@@ -859,6 +1165,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t timestamp;
+ decoder->have_tma = false;
+
if (decoder->ref_timestamp) {
timestamp = decoder->packet.payload |
(decoder->ref_timestamp & (0xffULL << 56));
@@ -876,21 +1184,29 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
} else if (decoder->timestamp) {
timestamp = decoder->packet.payload |
(decoder->timestamp & (0xffULL << 56));
+ decoder->tsc_timestamp = timestamp;
if (timestamp < decoder->timestamp &&
- decoder->timestamp - timestamp < 0x100) {
- intel_pt_log_to("ERROR: Suppressing backwards timestamp",
+ decoder->timestamp - timestamp < decoder->tsc_slip) {
+ intel_pt_log_to("Suppressing backwards timestamp",
timestamp);
timestamp = decoder->timestamp;
}
while (timestamp < decoder->timestamp) {
intel_pt_log_to("Wraparound timestamp", timestamp);
timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
}
- decoder->tsc_timestamp = timestamp;
decoder->timestamp = timestamp;
decoder->timestamp_insn_cnt = 0;
}
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, false);
+ }
+
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
@@ -898,11 +1214,117 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
{
intel_pt_log("ERROR: Buffer overflow\n");
intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->cbr = 0;
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
decoder->overflow = true;
return -EOVERFLOW;
}
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+ uint32_t ctc = decoder->packet.payload;
+ uint32_t fc = decoder->packet.count;
+ uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return;
+
+ decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ decoder->ctc_timestamp -= multdiv(ctc_rem,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ decoder->ctc_delta = 0;
+ decoder->have_tma = true;
+ intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
+ decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ uint32_t mtc, mtc_delta;
+
+ if (!decoder->have_tma)
+ return;
+
+ mtc = decoder->packet.payload;
+
+ if (mtc > decoder->last_mtc)
+ mtc_delta = mtc - decoder->last_mtc;
+ else
+ mtc_delta = mtc + 256 - decoder->last_mtc;
+
+ decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = decoder->ctc_timestamp +
+ decoder->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = decoder->ctc_timestamp +
+ multdiv(decoder->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+ decoder->last_mtc = mtc;
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, true);
+ }
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+ unsigned int cbr = decoder->packet.payload;
+
+ if (decoder->cbr == cbr)
+ return;
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+ decoder->have_cyc = true;
+
+ decoder->cycle_cnt += decoder->packet.payload;
+
+ if (!decoder->cyc_ref_timestamp)
+ return;
+
+ if (decoder->have_calc_cyc_to_tsc)
+ timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+ else if (decoder->cbr)
+ timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+ else
+ return;
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+}
+
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
@@ -921,8 +1343,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
case INTEL_PT_TNT:
+ case INTEL_PT_TRACESTOP:
case INTEL_PT_BAD:
case INTEL_PT_PSB:
+ decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
return -EAGAIN;
@@ -933,8 +1357,12 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
intel_pt_calc_tsc_timestamp(decoder);
break;
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
case INTEL_PT_CBR:
- decoder->cbr = decoder->packet.payload;
+ intel_pt_calc_cbr(decoder);
break;
case INTEL_PT_MODE_EXEC:
@@ -942,7 +1370,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload;
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
break;
case INTEL_PT_FUP:
@@ -954,6 +1382,15 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
intel_pt_update_in_tx(decoder);
break;
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
case INTEL_PT_PAD:
default:
break;
@@ -981,8 +1418,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
switch (decoder->packet.type) {
case INTEL_PT_TNT:
case INTEL_PT_FUP:
+ case INTEL_PT_TRACESTOP:
case INTEL_PT_PSB:
case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
case INTEL_PT_CBR:
case INTEL_PT_MODE_TSX:
case INTEL_PT_BAD:
@@ -1030,13 +1469,25 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
return 0;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload;
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
break;
case INTEL_PT_MODE_EXEC:
decoder->exec_mode = decoder->packet.payload;
break;
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
case INTEL_PT_PAD:
break;
@@ -1120,6 +1571,13 @@ next:
}
return intel_pt_walk_fup_tip(decoder);
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
case INTEL_PT_PSB:
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psbend(decoder);
@@ -1130,15 +1588,43 @@ next:
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload;
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
break;
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+ break;
+ /*
+ * Ensure that there has been an instruction since the
+ * last MTC.
+ */
+ if (!decoder->mtc_insn)
+ break;
+ decoder->mtc_insn = false;
+ /* Ensure that there is a timestamp */
+ if (!decoder->timestamp)
+ break;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->mtc_insn = false;
+ return 0;
+
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
case INTEL_PT_CBR:
- decoder->cbr = decoder->packet.payload;
+ intel_pt_calc_cbr(decoder);
break;
case INTEL_PT_MODE_EXEC:
@@ -1160,6 +1646,8 @@ next:
return intel_pt_bug(decoder);
case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
case INTEL_PT_PAD:
break;
@@ -1200,16 +1688,28 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
}
break;
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
case INTEL_PT_CBR:
- decoder->cbr = decoder->packet.payload;
+ intel_pt_calc_cbr(decoder);
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload;
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
break;
case INTEL_PT_MODE_EXEC:
@@ -1220,7 +1720,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
intel_pt_update_in_tx(decoder);
break;
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
case INTEL_PT_TNT:
+ decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
if (decoder->ip)
decoder->pkt_state = INTEL_PT_STATE_ERR4;
@@ -1238,6 +1743,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
return 0;
case INTEL_PT_PSB:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
case INTEL_PT_PAD:
default:
break;
@@ -1280,16 +1787,28 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
intel_pt_set_last_ip(decoder);
break;
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
break;
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
case INTEL_PT_CBR:
- decoder->cbr = decoder->packet.payload;
+ intel_pt_calc_cbr(decoder);
break;
case INTEL_PT_PIP:
- decoder->cr3 = decoder->packet.payload;
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
break;
case INTEL_PT_MODE_EXEC:
@@ -1306,6 +1825,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_BAD: /* Does not happen */
return intel_pt_bug(decoder);
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
case INTEL_PT_PSB:
err = intel_pt_walk_psb(decoder);
if (err)
@@ -1319,6 +1845,8 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_TNT:
case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
case INTEL_PT_PAD:
default:
break;
@@ -1529,6 +2057,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.timestamp = decoder->timestamp;
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
+ decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
if (err)
decoder->state.from_ip = decoder->ip;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 4c4880230cc9..02c38fec1c37 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -36,6 +36,7 @@ enum intel_pt_period_type {
INTEL_PT_PERIOD_NONE,
INTEL_PT_PERIOD_INSTRUCTIONS,
INTEL_PT_PERIOD_TICKS,
+ INTEL_PT_PERIOD_MTC,
};
enum {
@@ -58,6 +59,7 @@ struct intel_pt_state {
uint64_t from_ip;
uint64_t to_ip;
uint64_t cr3;
+ uint64_t tot_insn_cnt;
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
@@ -86,6 +88,9 @@ struct intel_pt_params {
uint64_t period;
enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio;
+ unsigned int mtc_period;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
};
struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 46980fc663ac..9e4eb8fcd559 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -20,7 +20,7 @@
#include "event.h"
-#include <asm/insn.h>
+#include "insn.h"
#include "inat.c"
#include "insn.c"
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 988c82c6652d..b1257c816310 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -24,6 +24,8 @@
#define BIT63 ((uint64_t)1 << 63)
+#define NR_FLAG BIT63
+
#if __BYTE_ORDER == __BIG_ENDIAN
#define le16_to_cpu bswap_16
#define le32_to_cpu bswap_32
@@ -46,15 +48,21 @@ static const char * const packet_name[] = {
[INTEL_PT_TIP_PGD] = "TIP.PGD",
[INTEL_PT_TIP_PGE] = "TIP.PGE",
[INTEL_PT_TSC] = "TSC",
+ [INTEL_PT_TMA] = "TMA",
[INTEL_PT_MODE_EXEC] = "MODE.Exec",
[INTEL_PT_MODE_TSX] = "MODE.TSX",
+ [INTEL_PT_MTC] = "MTC",
[INTEL_PT_TIP] = "TIP",
[INTEL_PT_FUP] = "FUP",
+ [INTEL_PT_CYC] = "CYC",
+ [INTEL_PT_VMCS] = "VMCS",
[INTEL_PT_PSB] = "PSB",
[INTEL_PT_PSBEND] = "PSBEND",
[INTEL_PT_CBR] = "CBR",
+ [INTEL_PT_TRACESTOP] = "TraceSTOP",
[INTEL_PT_PIP] = "PIP",
[INTEL_PT_OVF] = "OVF",
+ [INTEL_PT_MNT] = "MNT",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -96,10 +104,18 @@ static int intel_pt_get_pip(const unsigned char *buf, size_t len,
packet->type = INTEL_PT_PIP;
memcpy_le64(&payload, buf + 2, 6);
packet->payload = payload >> 1;
+ if (payload & 1)
+ packet->payload |= NR_FLAG;
return 8;
}
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_TRACESTOP;
+ return 2;
+}
+
static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -110,6 +126,24 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
return 4;
}
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int count = (52 - 5) >> 3;
+
+ if (count < 1 || count > 7)
+ return INTEL_PT_BAD_PACKET;
+
+ if (len < count + 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_VMCS;
+ packet->count = count;
+ memcpy_le64(&packet->payload, buf + 2, count);
+
+ return count + 2;
+}
+
static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_OVF;
@@ -139,12 +173,49 @@ static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
return 2;
}
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_TMA;
+ packet->payload = buf[2] | (buf[3] << 8);
+ packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+ return 7;
+}
+
static int intel_pt_get_pad(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_PAD;
return 1;
}
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MNT;
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11
+;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[2]) {
+ case 0x88: /* MNT */
+ return intel_pt_get_mnt(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -156,14 +227,22 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_long_tnt(buf, len, packet);
case 0x43: /* PIP */
return intel_pt_get_pip(buf, len, packet);
+ case 0x83: /* TraceStop */
+ return intel_pt_get_tracestop(packet);
case 0x03: /* CBR */
return intel_pt_get_cbr(buf, len, packet);
+ case 0xc8: /* VMCS */
+ return intel_pt_get_vmcs(buf, len, packet);
case 0xf3: /* OVF */
return intel_pt_get_ovf(packet);
case 0x82: /* PSB */
return intel_pt_get_psb(buf, len, packet);
case 0x23: /* PSBEND */
return intel_pt_get_psbend(packet);
+ case 0x73: /* TMA */
+ return intel_pt_get_tma(buf, len, packet);
+ case 0xC3: /* 3-byte header */
+ return intel_pt_get_3byte(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -187,6 +266,28 @@ static int intel_pt_get_short_tnt(unsigned int byte,
return 1;
}
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+ size_t len, struct intel_pt_pkt *packet)
+{
+ unsigned int offs = 1, shift;
+ uint64_t payload = byte >> 3;
+
+ byte >>= 2;
+ len -= 1;
+ for (shift = 5; byte & 1; shift += 7) {
+ if (offs > 9)
+ return INTEL_PT_BAD_PACKET;
+ if (len < offs)
+ return INTEL_PT_NEED_MORE_BYTES;
+ byte = buf[offs++];
+ payload |= (byte >> 1) << shift;
+ }
+
+ packet->type = INTEL_PT_CYC;
+ packet->payload = payload;
+ return offs;
+}
+
static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
@@ -269,6 +370,16 @@ static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
return 8;
}
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MTC;
+ packet->payload = buf[1];
+ return 2;
+}
+
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -288,6 +399,9 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return intel_pt_get_short_tnt(byte, packet);
}
+ if ((byte & 2))
+ return intel_pt_get_cyc(byte, buf, len, packet);
+
switch (byte & 0x1f) {
case 0x0D:
return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
@@ -305,6 +419,8 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return intel_pt_get_mode(buf, len, packet);
case 0x19:
return intel_pt_get_tsc(buf, len, packet);
+ case 0x59:
+ return intel_pt_get_mtc(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -329,7 +445,7 @@ int intel_pt_get_packet(const unsigned char *buf, size_t len,
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
size_t buf_len)
{
- int ret, i;
+ int ret, i, nr;
unsigned long long payload = packet->payload;
const char *name = intel_pt_pkt_name(packet->type);
@@ -338,6 +454,7 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
case INTEL_PT_PAD:
case INTEL_PT_PSB:
case INTEL_PT_PSBEND:
+ case INTEL_PT_TRACESTOP:
case INTEL_PT_OVF:
return snprintf(buf, buf_len, "%s", name);
case INTEL_PT_TNT: {
@@ -371,17 +488,16 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
case INTEL_PT_FUP:
if (!(packet->count))
return snprintf(buf, buf_len, "%s no ip", name);
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MTC:
+ case INTEL_PT_MNT:
case INTEL_PT_CBR:
- return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
case INTEL_PT_TSC:
- if (packet->count)
- return snprintf(buf, buf_len,
- "%s 0x%llx CTC 0x%x FC 0x%x",
- name, payload, packet->count & 0xffff,
- (packet->count >> 16) & 0x1ff);
- else
- return snprintf(buf, buf_len, "%s 0x%llx",
- name, payload);
+ return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+ case INTEL_PT_TMA:
+ return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+ (unsigned)payload, packet->count);
case INTEL_PT_MODE_EXEC:
return snprintf(buf, buf_len, "%s %lld", name, payload);
case INTEL_PT_MODE_TSX:
@@ -389,8 +505,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
name, (unsigned)(payload >> 1) & 1,
(unsigned)payload & 1);
case INTEL_PT_PIP:
- ret = snprintf(buf, buf_len, "%s 0x%llx",
- name, payload);
+ nr = packet->payload & NR_FLAG ? 1 : 0;
+ payload &= ~NR_FLAG;
+ ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+ name, payload, nr);
return ret;
default:
break;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 53404fa942b3..781bb79883bd 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -37,15 +37,21 @@ enum intel_pt_pkt_type {
INTEL_PT_TIP_PGD,
INTEL_PT_TIP_PGE,
INTEL_PT_TSC,
+ INTEL_PT_TMA,
INTEL_PT_MODE_EXEC,
INTEL_PT_MODE_TSX,
+ INTEL_PT_MTC,
INTEL_PT_TIP,
INTEL_PT_FUP,
+ INTEL_PT_CYC,
+ INTEL_PT_VMCS,
INTEL_PT_PSB,
INTEL_PT_PSBEND,
INTEL_PT_CBR,
+ INTEL_PT_TRACESTOP,
INTEL_PT_PIP,
INTEL_PT_OVF,
+ INTEL_PT_MNT,
};
struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index a5acd2fe2447..bb41c20e6005 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -91,6 +91,11 @@ struct intel_pt {
bool synth_needs_swap;
u64 tsc_bit;
+ u64 mtc_bit;
+ u64 mtc_freq_bits;
+ u32 tsc_ctc_ratio_n;
+ u32 tsc_ctc_ratio_d;
+ u64 cyc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
};
@@ -126,6 +131,7 @@ struct intel_pt_queue {
u64 timestamp;
u32 flags;
u16 insn_len;
+ u64 last_insn_cnt;
};
static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -567,6 +573,25 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
return true;
}
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ unsigned int shift;
+ u64 config;
+
+ if (!pt->mtc_freq_bits)
+ return 0;
+
+ for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+ config >>= 1;
+
+ evlist__for_each(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config))
+ return (config & pt->mtc_freq_bits) >> shift;
+ }
+ return 0;
+}
+
static bool intel_pt_timeless_decoding(struct intel_pt *pt)
{
struct perf_evsel *evsel;
@@ -667,6 +692,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+ params.mtc_period = intel_pt_mtc_period(pt);
+ params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+ params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
if (pt->synth_opts.instructions) {
if (pt->synth_opts.period) {
@@ -920,11 +948,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.addr = ptq->state->to_ip;
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
- sample.period = ptq->pt->instructions_sample_period;
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
+ ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
pt->synth_opts.callchain_sz, sample.ip);
@@ -1748,16 +1778,20 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
}
static const char * const intel_pt_info_fmts[] = {
- [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
- [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
- [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
- [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
- [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
- [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
- [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
- [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
- [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
- [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
+ [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
+ [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
+ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+ [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
+ [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
+ [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
};
static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -1809,6 +1843,17 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
INTEL_PT_PER_CPU_MMAPS);
+ if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
+ (sizeof(u64) * INTEL_PT_CYC_BIT)) {
+ pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+ pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+ pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+ pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+ pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+ INTEL_PT_CYC_BIT);
+ }
+
pt->timeless_decoding = intel_pt_timeless_decoding(pt);
pt->have_tsc = intel_pt_have_tsc(pt);
pt->sampling_mode = false;
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index a1bfe93473ba..0065949df693 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -29,6 +29,11 @@ enum {
INTEL_PT_HAVE_SCHED_SWITCH,
INTEL_PT_SNAPSHOT_MODE,
INTEL_PT_PER_CPU_MMAPS,
+ INTEL_PT_MTC_BIT,
+ INTEL_PT_MTC_FREQ_BITS,
+ INTEL_PT_TSC_CTC_N,
+ INTEL_PT_TSC_CTC_D,
+ INTEL_PT_CYC_BIT,
INTEL_PT_AUXTRACE_PRIV_MAX,
};
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 52be201b9b25..b1b9e2385f4b 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe)
else if (last_ts <= limit)
oe->last = list_entry(head->prev, struct ordered_event, list);
+ if (show_progress)
+ ui_progress__finish();
+
return 0;
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f07374bc9c5a..eb5f18b75402 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -515,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
if (ret < 0)
goto error;
addr += stext;
- } else {
+ } else if (tp->symbol) {
addr = kernel_get_symbol_address_by_name(tp->symbol, false);
if (addr == 0)
goto error;
@@ -1194,15 +1194,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
*ptr++ = '\0';
}
- tmp = strdup(arg);
- if (tmp == NULL)
- return -ENOMEM;
+ if (arg[0] == '\0')
+ tmp = NULL;
+ else {
+ tmp = strdup(arg);
+ if (tmp == NULL)
+ return -ENOMEM;
+ }
if (file_spec)
pp->file = tmp;
- else
+ else {
pp->function = tmp;
+ /*
+ * Keep pp->function even if this is absolute address,
+ * so it can mark whether abs_address is valid.
+ * Which make 'perf probe lib.bin 0x0' possible.
+ *
+ * Note that checking length of tmp is not needed
+ * because when we access tmp[1] we know tmp[0] is '0',
+ * so tmp[1] should always valid (but could be '\0').
+ */
+ if (tmp && !strncmp(tmp, "0x", 2)) {
+ pp->abs_address = strtoul(pp->function, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("Invalid absolute address.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
/* Parse other options */
while (ptr) {
arg = ptr;
@@ -1519,9 +1541,31 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
} else
p = argv[1];
fmt1_str = strtok_r(p, "+", &fmt);
- if (fmt1_str[0] == '0') /* only the address started with 0x */
- tp->address = strtoul(fmt1_str, NULL, 0);
- else {
+ /* only the address started with 0x */
+ if (fmt1_str[0] == '0') {
+ /*
+ * Fix a special case:
+ * if address == 0, kernel reports something like:
+ * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax
+ * Newer kernel may fix that, but we want to
+ * support old kernel also.
+ */
+ if (strcmp(fmt1_str, "0x") == 0) {
+ if (!argv[2] || strcmp(argv[2], "(null)")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tp->address = 0;
+
+ free(argv[2]);
+ for (i = 2; argv[i + 1] != NULL; i++)
+ argv[i] = argv[i + 1];
+
+ argv[i] = NULL;
+ argc -= 1;
+ } else
+ tp->address = strtoul(fmt1_str, NULL, 0);
+ } else {
/* Only the symbol-based probe has offset */
tp->symbol = strdup(fmt1_str);
if (tp->symbol == NULL) {
@@ -1778,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
if (len <= 0)
goto error;
- /* Uprobes must have tp->address and tp->module */
- if (tev->uprobes && (!tp->address || !tp->module))
+ /* Uprobes must have tp->module */
+ if (tev->uprobes && !tp->module)
goto error;
+ /*
+ * If tp->address == 0, then this point must be a
+ * absolute address uprobe.
+ * try_to_find_absolute_address() should have made
+ * tp->symbol to "0x0".
+ */
+ if (tev->uprobes && !tp->address) {
+ if (!tp->symbol || strcmp(tp->symbol, "0x0"))
+ goto error;
+ }
/* Use the tp->address for uprobes */
if (tev->uprobes)
ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
tp->module, tp->address);
+ else if (!strncmp(tp->symbol, "0x", 2))
+ /* Absolute address. See try_to_find_absolute_address() */
+ ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
+ tp->module ?: "", tp->module ? ":" : "",
+ tp->address);
else
ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
tp->module ?: "", tp->module ? ":" : "",
@@ -1815,17 +1874,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
{
struct symbol *sym = NULL;
struct map *map;
- u64 addr;
+ u64 addr = tp->address;
int ret = -ENOENT;
if (!is_kprobe) {
map = dso__new_map(tp->module);
if (!map)
goto out;
- addr = tp->address;
sym = map__find_symbol(map, addr, NULL);
} else {
- addr = kernel_get_symbol_address_by_name(tp->symbol, true);
+ if (tp->symbol)
+ addr = kernel_get_symbol_address_by_name(tp->symbol, true);
if (addr) {
addr += tp->offset;
sym = __find_kernel_function(addr, &map);
@@ -1848,8 +1907,8 @@ out:
}
static int convert_to_perf_probe_point(struct probe_trace_point *tp,
- struct perf_probe_point *pp,
- bool is_kprobe)
+ struct perf_probe_point *pp,
+ bool is_kprobe)
{
char buf[128];
int ret;
@@ -1866,7 +1925,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp,
if (tp->symbol) {
pp->function = strdup(tp->symbol);
pp->offset = tp->offset;
- } else if (!tp->module && !is_kprobe) {
+ } else {
ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address);
if (ret < 0)
return ret;
@@ -2305,7 +2364,9 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
if (pev->event)
event = pev->event;
else
- if (pev->point.function && !strisglob(pev->point.function))
+ if (pev->point.function &&
+ (strncmp(pev->point.function, "0x", 2) != 0) &&
+ !strisglob(pev->point.function))
event = pev->point.function;
else
event = tev->point.realname;
@@ -2572,6 +2633,98 @@ err_out:
goto out;
}
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_event *tev;
+ struct probe_trace_point *tp;
+ int i, err;
+
+ if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+ return -EINVAL;
+ if (perf_probe_event_need_dwarf(pev))
+ return -EINVAL;
+
+ /*
+ * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+ * absolute address.
+ *
+ * Only one tev can be generated by this.
+ */
+ *tevs = zalloc(sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ tev = *tevs;
+ tp = &tev->point;
+
+ /*
+ * Don't use tp->offset, use address directly, because
+ * in synthesize_probe_trace_command() address cannot be
+ * zero.
+ */
+ tp->address = pev->point.abs_address;
+ tp->retprobe = pp->retprobe;
+ tev->uprobes = pev->uprobes;
+
+ err = -ENOMEM;
+ /*
+ * Give it a '0x' leading symbol name.
+ * In __add_probe_trace_events, a NULL symbol is interpreted as
+ * invalud.
+ */
+ if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
+ goto errout;
+
+ /* For kprobe, check range */
+ if ((!tev->uprobes) &&
+ (kprobe_warn_out_range(tev->point.symbol,
+ tev->point.address))) {
+ err = -EACCES;
+ goto errout;
+ }
+
+ if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0)
+ goto errout;
+
+ if (pev->target) {
+ tp->module = strdup(pev->target);
+ if (!tp->module)
+ goto errout;
+ }
+
+ if (tev->group) {
+ tev->group = strdup(pev->group);
+ if (!tev->group)
+ goto errout;
+ }
+
+ if (pev->event) {
+ tev->event = strdup(pev->event);
+ if (!tev->event)
+ goto errout;
+ }
+
+ tev->nargs = pev->nargs;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (!tev->args) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ for (i = 0; i < tev->nargs; i++)
+ copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+ return 1;
+
+errout:
+ if (*tevs) {
+ clear_probe_trace_events(*tevs, 1);
+ *tevs = NULL;
+ }
+ return err;
+}
+
bool __weak arch__prefers_symtab(void) { return false; }
static int convert_to_probe_trace_events(struct perf_probe_event *pev,
@@ -2588,6 +2741,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
}
}
+ ret = try_to_find_absolute_address(pev, tevs);
+ if (ret > 0)
+ return ret;
+
if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
ret = find_probe_trace_events_from_map(pev, tevs);
if (ret > 0)
@@ -2758,3 +2915,22 @@ end:
return ret;
}
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar)
+{
+ tvar->value = strdup(pvar->var);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ if (pvar->type) {
+ tvar->type = strdup(pvar->type);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ }
+ if (pvar->name) {
+ tvar->name = strdup(pvar->name);
+ if (tvar->name == NULL)
+ return -ENOMEM;
+ } else
+ tvar->name = NULL;
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 83ee95e9743b..6e7ec68a4aa8 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -59,6 +59,7 @@ struct perf_probe_point {
bool retprobe; /* Return probe flag */
char *lazy_line; /* Lazy matching pattern */
unsigned long offset; /* Offset from function entry */
+ unsigned long abs_address; /* Absolute address of the point */
};
/* Perf probe probing argument field chain */
@@ -156,4 +157,7 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar);
+
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 7b80f8cb62b9..29c43c0680a8 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
char buf[32], *ptr;
int ret = 0;
- if (!is_c_varname(pf->pvar->var)) {
- /* Copy raw parameters */
- pf->tvar->value = strdup(pf->pvar->var);
- if (pf->tvar->value == NULL)
- return -ENOMEM;
- if (pf->pvar->type) {
- pf->tvar->type = strdup(pf->pvar->type);
- if (pf->tvar->type == NULL)
- return -ENOMEM;
- }
- if (pf->pvar->name) {
- pf->tvar->name = strdup(pf->pvar->name);
- if (pf->tvar->name == NULL)
- return -ENOMEM;
- } else
- pf->tvar->name = NULL;
- return 0;
- }
+ /* Copy raw parameters */
+ if (!is_c_varname(pf->pvar->var))
+ return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
if (pf->pvar->name)
pf->tvar->name = strdup(pf->pvar->name);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 42e98ab5a9bb..46ae0532a8a6 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -441,6 +441,16 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
return &s->sym;
}
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+ enum map_type type;
+
+ for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
+ dso->last_find_result[type].addr = 0;
+ dso->last_find_result[type].symbol = NULL;
+ }
+}
+
struct symbol *dso__find_symbol(struct dso *dso,
enum map_type type, u64 addr)
{