diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-08 19:37:20 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-08 19:37:20 -0800 |
commit | aac4de465af08ccec90ef47bdcc13435e48a7223 (patch) | |
tree | 38d00205e7f4a84943fa9954ffe1ad04e603e5a6 | |
parent | 0bdf0621f89f87858ca26344378188eff194eddd (diff) | |
parent | fdd041028f2294228e10610b4fca6a1a83ac683d (diff) |
Merge tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar:
- Add branch stack counters ABI extension to better capture the growing
amount of information the PMU exposes via branch stack sampling.
There's matching tooling support.
- Fix race when creating the nr_addr_filters sysfs file
- Add Intel Sierra Forest and Grand Ridge intel/cstate PMU support
- Add Intel Granite Rapids, Sierra Forest and Grand Ridge uncore PMU
support
- Misc cleanups & fixes
* tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/uncore: Factor out topology_gidnid_map()
perf/x86/intel/uncore: Fix NULL pointer dereference issue in upi_fill_topology()
perf/x86/amd: Reject branch stack for IBS events
perf/x86/intel/uncore: Support Sierra Forest and Grand Ridge
perf/x86/intel/uncore: Support IIO free-running counters on GNR
perf/x86/intel/uncore: Support Granite Rapids
perf/x86/uncore: Use u64 to replace unsigned for the uncore offsets array
perf/x86/intel/uncore: Generic uncore_get_uncores and MMIO format of SPR
perf: Fix the nr_addr_filters fix
perf/x86/intel/cstate: Add Grand Ridge support
perf/x86/intel/cstate: Add Sierra Forest support
x86/smp: Export symbol cpu_clustergroup_mask()
perf/x86/intel/cstate: Cleanup duplicate attr_groups
perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file
perf/x86/intel: Support branch counters logging
perf/x86/intel: Reorganize attrs and is_visible
perf: Add branch_sample_call_stack
perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag
perf: Add branch stack counters
23 files changed, 625 insertions, 128 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps index 8757dcf41c08..a5f506f7d481 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-caps @@ -16,3 +16,9 @@ Description: Example output in powerpc: grep . /sys/bus/event_source/devices/cpu/caps/* /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9 + + The "branch_counter_nr" in the supported platform exposes the + maximum number of counters which can be shown in the u64 counters + of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width" + exposes the width of each counter. Both of them can be used by + the perf tool to parse the logged counters in each branch. diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 10b946e9c6e7..b7ff680cde96 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2312,7 +2312,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(event, cpuhw); - perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack); + perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL); } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 25ad6fd98166..81f6d8275b6b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) continue; if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 6911c5399d02..e91970b01d62 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -287,6 +287,9 @@ static int perf_ibs_init(struct perf_event *event) if (config & ~perf_ibs->config_mask) return -EINVAL; + if (has_branch_stack(event)) + return -EOPNOTSUPP; + ret = validate_group(event); if (ret) return ret; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 40ad1425ffa2..09050641ce5d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -601,7 +601,7 @@ int x86_pmu_hw_config(struct perf_event *event) } } - if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) + if (branch_sample_call_stack(event)) event->attach_state |= PERF_ATTACH_TASK_DATA; /* @@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) perf_sample_data_init(&data, 0, event->hw.last_period); if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d50b3e066e89..3804f21ab049 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx) perf_report_aux_output_id(event, idx); } +static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event) +{ + return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK; +} + static void intel_pmu_del_event(struct perf_event *event) { - if (needs_branch_stack(event)) + if (intel_pmu_needs_branch_stack(event)) intel_pmu_lbr_del(event); if (event->attr.precise_ip) intel_pmu_pebs_del(event); @@ -2787,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event) static void intel_pmu_enable_event(struct perf_event *event) { + u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -2795,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event) switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: + if (branch_sample_counters(event)) + enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: @@ -2820,7 +2828,7 @@ static void intel_pmu_add_event(struct perf_event *event) { if (event->attr.precise_ip) intel_pmu_pebs_add(event); - if (needs_branch_stack(event)) + if (intel_pmu_needs_branch_stack(event)) intel_pmu_lbr_add(event); } @@ -3047,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); + intel_pmu_lbr_save_brstack(&data, cpuc, event); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -3612,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (cpuc->excl_cntrs) return intel_get_excl_constraints(cpuc, event, idx, c2); + /* Not all counters support the branch counter feature. */ + if (branch_sample_counters(event)) { + c2 = dyn_constraint(cpuc, c2, idx); + c2->idxmsk64 &= x86_pmu.lbr_counters; + c2->weight = hweight64(c2->idxmsk64); + } + return c2; } @@ -3897,7 +3912,62 @@ static int intel_pmu_hw_config(struct perf_event *event) x86_pmu.pebs_aliases(event); } - if (needs_branch_stack(event)) { + if (needs_branch_stack(event) && is_sampling_event(event)) + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + + if (branch_sample_counters(event)) { + struct perf_event *leader, *sibling; + int num = 0; + + if (!(x86_pmu.flags & PMU_FL_BR_CNTR) || + (event->attr.config & ~INTEL_ARCH_EVENT_MASK)) + return -EINVAL; + + /* + * The branch counter logging is not supported in the call stack + * mode yet, since we cannot simply flush the LBR during e.g., + * multiplexing. Also, there is no obvious usage with the call + * stack mode. Simply forbids it for now. + * + * If any events in the group enable the branch counter logging + * feature, the group is treated as a branch counter logging + * group, which requires the extra space to store the counters. + */ + leader = event->group_leader; + if (branch_sample_call_stack(leader)) + return -EINVAL; + if (branch_sample_counters(leader)) + num++; + leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; + + for_each_sibling_event(sibling, leader) { + if (branch_sample_call_stack(sibling)) + return -EINVAL; + if (branch_sample_counters(sibling)) + num++; + } + + if (num > fls(x86_pmu.lbr_counters)) + return -EINVAL; + /* + * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't + * require any branch stack setup. + * Clear the bit to avoid unnecessary branch stack setup. + */ + if (0 == (event->attr.branch_sample_type & + ~(PERF_SAMPLE_BRANCH_PLM_ALL | + PERF_SAMPLE_BRANCH_COUNTERS))) + event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK; + + /* + * Force the leader to be a LBR event. So LBRs can be reset + * with the leader event. See intel_pmu_lbr_del() for details. + */ + if (!intel_pmu_needs_branch_stack(leader)) + return -EINVAL; + } + + if (intel_pmu_needs_branch_stack(event)) { ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; @@ -4380,8 +4450,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, */ if (event->attr.precise_ip == 3) { /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ - if (constraint_match(&fixed0_constraint, event->hw.config)) - return &fixed0_counter0_1_constraint; + if (constraint_match(&fixed0_constraint, event->hw.config)) { + /* The fixed counter 0 doesn't support LBR event logging. */ + if (branch_sample_counters(event)) + return &counter0_1_constraint; + else + return &fixed0_counter0_1_constraint; + } switch (c->idxmsk64 & 0x3ull) { case 0x1: @@ -4560,7 +4635,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); @@ -5532,11 +5607,41 @@ static ssize_t branches_show(struct device *cdev, static DEVICE_ATTR_RO(branches); +static ssize_t branch_counter_nr_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters)); +} + +static DEVICE_ATTR_RO(branch_counter_nr); + +static ssize_t branch_counter_width_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS); +} + +static DEVICE_ATTR_RO(branch_counter_width); + static struct attribute *lbr_attrs[] = { &dev_attr_branches.attr, + &dev_attr_branch_counter_nr.attr, + &dev_attr_branch_counter_width.attr, NULL }; +static umode_t +lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + /* branches */ + if (i == 0) + return x86_pmu.lbr_nr ? attr->mode : 0; + + return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0; +} + static char pmu_name_str[30]; static ssize_t pmu_name_show(struct device *cdev, @@ -5564,6 +5669,15 @@ static struct attribute *intel_pmu_attrs[] = { }; static umode_t +default_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &dev_attr_allow_tsx_force_abort.attr) + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; + + return attr->mode; +} + +static umode_t tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) { return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; @@ -5585,26 +5699,11 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i) } static umode_t -lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) -{ - return x86_pmu.lbr_nr ? attr->mode : 0; -} - -static umode_t exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) { return x86_pmu.version >= 2 ? attr->mode : 0; } -static umode_t -default_is_visible(struct kobject *kobj, struct attribute *attr, int i) -{ - if (attr == &dev_attr_allow_tsx_force_abort.attr) - return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; - - return attr->mode; -} - static struct attribute_group group_events_td = { .name = "events", }; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index cbeb6d2bf5b4..4b50a3a9818a 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,7 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL - * MTL + * MTL,SRF,GRR * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -52,7 +52,8 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR,MTL + * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, + * GRR * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -75,7 +76,7 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR,MTL + * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -97,6 +98,10 @@ * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) + * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. + * perf code: 0x00 + * Available model: SRF,GRR + * Scope: A cluster of cores shared L2 cache * */ @@ -130,6 +135,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, struct cstate_model { unsigned long core_events; unsigned long pkg_events; + unsigned long module_events; unsigned long quirks; }; @@ -189,20 +195,20 @@ static struct attribute *attrs_empty[] = { * "events" group (with empty attrs) before updating * it with detected events. */ -static struct attribute_group core_events_attr_group = { +static struct attribute_group cstate_events_attr_group = { .name = "events", .attrs = attrs_empty, }; -DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); -static struct attribute *core_format_attrs[] = { - &format_attr_core_event.attr, +DEFINE_CSTATE_FORMAT_ATTR(cstate_event, event, "config:0-63"); +static struct attribute *cstate_format_attrs[] = { + &format_attr_cstate_event.attr, NULL, }; -static struct attribute_group core_format_attr_group = { +static struct attribute_group cstate_format_attr_group = { .name = "format", - .attrs = core_format_attrs, + .attrs = cstate_format_attrs, }; static cpumask_t cstate_core_cpu_mask; @@ -217,9 +223,9 @@ static struct attribute_group cpumask_attr_group = { .attrs = cstate_cpumask_attrs, }; -static const struct attribute_group *core_attr_groups[] = { - &core_events_attr_group, - &core_format_attr_group, +static const struct attribute_group *cstate_attr_groups[] = { + &cstate_events_attr_group, + &cstate_format_attr_group, &cpumask_attr_group, NULL, }; @@ -268,30 +274,30 @@ static struct perf_msr pkg_msr[] = { [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; -static struct attribute_group pkg_events_attr_group = { - .name = "events", - .attrs = attrs_empty, -}; +static cpumask_t cstate_pkg_cpu_mask; -DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); -static struct attribute *pkg_format_attrs[] = { - &format_attr_pkg_event.attr, - NULL, -}; -static struct attribute_group pkg_format_attr_group = { - .name = "format", - .attrs = pkg_format_attrs, +/* cstate_module PMU */ +static struct pmu cstate_module_pmu; +static bool has_cstate_module; + +enum perf_cstate_module_events { + PERF_CSTATE_MODULE_C6_RES = 0, + + PERF_CSTATE_MODULE_EVENT_MAX, }; -static cpumask_t cstate_pkg_cpu_mask; +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_module_c6, "event=0x00"); -static const struct attribute_group *pkg_attr_groups[] = { - &pkg_events_attr_group, - &pkg_format_attr_group, - &cpumask_attr_group, - NULL, +static unsigned long module_msr_mask; + +PMU_EVENT_GROUP(events, cstate_module_c6); + +static struct perf_msr module_msr[] = { + [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr }, }; +static cpumask_t cstate_module_cpu_mask; + static ssize_t cstate_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) @@ -302,6 +308,8 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); else if (pmu == &cstate_pkg_pmu) return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); + else if (pmu == &cstate_module_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask); else return 0; } @@ -342,6 +350,15 @@ static int cstate_pmu_event_init(struct perf_event *event) event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, topology_die_cpumask(event->cpu)); + } else if (event->pmu == &cstate_module_pmu) { + if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX) + return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_MODULE_EVENT_MAX); + if (!(module_msr_mask & (1 << cfg))) + return -EINVAL; + event->hw.event_base = module_msr[cfg].msr; + cpu = cpumask_any_and(&cstate_module_cpu_mask, + topology_cluster_cpumask(event->cpu)); } else { return -ENOENT; } @@ -429,6 +446,17 @@ static int cstate_cpu_exit(unsigned int cpu) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); } } + + if (has_cstate_module && + cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) { + + target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &cstate_module_cpu_mask); + perf_pmu_migrate_context(&cstate_module_pmu, cpu, target); + } + } return 0; } @@ -455,6 +483,15 @@ static int cstate_cpu_init(unsigned int cpu) if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); + /* + * If this is the first online thread of that cluster, set it + * in the cluster cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_module_cpu_mask, + topology_cluster_cpumask(cpu)); + if (has_cstate_module && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_module_cpu_mask); + return 0; } @@ -477,8 +514,13 @@ static const struct attribute_group *pkg_attr_update[] = { NULL, }; +static const struct attribute_group *module_attr_update[] = { + &group_cstate_module_c6, + NULL +}; + static struct pmu cstate_core_pmu = { - .attr_groups = core_attr_groups, + .attr_groups = cstate_attr_groups, .attr_update = core_attr_update, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, @@ -493,7 +535,7 @@ static struct pmu cstate_core_pmu = { }; static struct pmu cstate_pkg_pmu = { - .attr_groups = pkg_attr_groups, + .attr_groups = cstate_attr_groups, .attr_update = pkg_attr_update, .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, @@ -507,6 +549,21 @@ static struct pmu cstate_pkg_pmu = { .module = THIS_MODULE, }; +static struct pmu cstate_module_pmu = { + .attr_groups = cstate_attr_groups, + .attr_update = module_attr_update, + .name = "cstate_module", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, +}; + static const struct cstate_model nhm_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | BIT(PERF_CSTATE_CORE_C6_RES), @@ -621,6 +678,22 @@ static const struct cstate_model glm_cstates __initconst = { BIT(PERF_CSTATE_PKG_C10_RES), }; +static const struct cstate_model grr_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), +}; + +static const struct cstate_model srf_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + + .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), +}; + static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), @@ -673,6 +746,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &srf_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &grr_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), @@ -714,10 +789,14 @@ static int __init cstate_probe(const struct cstate_model *cm) pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, true, (void *) &cm->pkg_events); + module_msr_mask = perf_msr_probe(module_msr, PERF_CSTATE_MODULE_EVENT_MAX, + true, (void *) &cm->module_events); + has_cstate_core = !!core_msr_mask; has_cstate_pkg = !!pkg_msr_mask; + has_cstate_module = !!module_msr_mask; - return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; + return (has_cstate_core || has_cstate_pkg || has_cstate_module) ? 0 : -ENODEV; } static inline void cstate_cleanup(void) @@ -730,6 +809,9 @@ static inline void cstate_cleanup(void) if (has_cstate_pkg) perf_pmu_unregister(&cstate_pkg_pmu); + + if (has_cstate_module) + perf_pmu_unregister(&cstate_module_pmu); } static int __init cstate_init(void) @@ -766,6 +848,16 @@ static int __init cstate_init(void) return err; } } + + if (has_cstate_module) { + err = perf_pmu_register(&cstate_module_pmu, cstate_module_pmu.name, -1); + if (err) { + has_cstate_module = false; + pr_info("Failed to register cstate cluster pmu\n"); + cstate_cleanup(); + return err; + } + } return 0; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index bf97ab904d40..d49d661ec0a7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, setup_pebs_time(event, data, pebs->tsc); if (has_branch_stack(event)) - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); + intel_pmu_lbr_save_brstack(data, cpuc, event); } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index c3b0d15a9841..78cd5084104e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event) WARN_ON_ONCE(cpuc->lbr_users < 0); WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); perf_sched_cb_dec(event->pmu); + + /* + * The logged occurrences information is only valid for the + * current LBR group. If another LBR group is scheduled in + * later, the information from the stale LBRs will be wrongly + * interpreted. Reset the LBRs here. + * + * Only clear once for a branch counter group with the leader + * event. Because + * - Cannot simply reset the LBRs with the !cpuc->lbr_users. + * Because it's possible that the last LBR user is not in a + * branch counter group, e.g., a branch_counters group + + * several normal LBR events. + * - The LBR reset can be done with any one of the events in a + * branch counter group, since they are always scheduled together. + * It's easy to force the leader event an LBR event. + */ + if (is_branch_counters_group(event) && event == event->group_leader) + intel_pmu_lbr_reset(); } static inline bool vlbr_exclude_host(void) @@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info) return cycles; } +static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); + static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, struct lbr_entry *entries) { @@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, e->abort = !!(info & LBR_INFO_ABORT); e->cycles = get_lbr_cycles(info); e->type = get_lbr_br_type(info); + + /* + * Leverage the reserved field of cpuc->lbr_entries[i] to + * temporarily store the branch counters information. + * The later code will decide what content can be disclosed + * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). + */ + e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; } cpuc->lbr_stack.nr = i; } +/* + * The enabled order may be different from the counter order. + * Update the lbr_counters with the enabled order. + */ +static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i, j, pos = 0, order[X86_PMC_IDX_MAX]; + struct perf_event *leader, *sibling; + u64 src, dst, cnt; + + leader = event->group_leader; + if (branch_sample_counters(leader)) + order[pos++] = leader->hw.idx; + + for_each_sibling_event(sibling, leader) { + if (!branch_sample_counters(sibling)) + continue; + order[pos++] = sibling->hw.idx; + } + + WARN_ON_ONCE(!pos); + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + src = cpuc->lbr_entries[i].reserved; + dst = 0; + for (j = 0; j < pos; j++) { + cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; + dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; + } + cpuc->lbr_counters[i] = dst; + cpuc->lbr_entries[i].reserved = 0; + } +} + +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (is_branch_counters_group(event)) { + intel_pmu_lbr_counters_reorder(cpuc, event); + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); + return; + } + + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); +} + static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) { intel_pmu_store_lbr(cpuc, NULL); @@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) for (i = 0; i < cpuc->lbr_stack.nr; ) { if (!cpuc->lbr_entries[i].from) { j = i; - while (++j < cpuc->lbr_stack.nr) + while (++j < cpuc->lbr_stack.nr) { cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; + } cpuc->lbr_stack.nr--; if (!cpuc->lbr_entries[i].from) continue; @@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_mispred = ecx.split.lbr_mispred; x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; x86_pmu.lbr_br_type = ecx.split.lbr_br_type; + x86_pmu.lbr_counters = ecx.split.lbr_counters; x86_pmu.lbr_nr = lbr_nr; + if (!!x86_pmu.lbr_counters) + x86_pmu.flags |= PMU_FL_BR_CNTR; + if (x86_pmu.lbr_mispred) static_branch_enable(&x86_lbr_mispred); if (x86_pmu.lbr_timed_lbr) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 01023aa5125b..7927c0b832fa 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1814,6 +1814,14 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = { .uncore_units_ignore = spr_uncore_units_ignore, }; +static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { + .cpu_init = gnr_uncore_cpu_init, + .pci_init = gnr_uncore_pci_init, + .mmio_init = gnr_uncore_mmio_init, + .use_discovery = true, + .uncore_units_ignore = gnr_uncore_units_ignore, +}; + static const struct intel_uncore_init_fun generic_uncore_init __initconst = { .cpu_init = intel_uncore_generic_uncore_cpu_init, .pci_init = intel_uncore_generic_uncore_pci_init, @@ -1865,8 +1873,12 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &gnr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &gnr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index c30fb5bb1222..4838502d89ae 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -72,9 +72,9 @@ struct intel_uncore_type { unsigned single_fixed:1; unsigned pair_ctr_ctl:1; union { - unsigned *msr_offsets; - unsigned *pci_offsets; - unsigned *mmio_offsets; + u64 *msr_offsets; + u64 *pci_offsets; + u64 *mmio_offsets; }; unsigned *box_ids; struct event_constraint unconstrainted; @@ -593,6 +593,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; extern int spr_uncore_units_ignore[]; +extern int gnr_uncore_units_ignore[]; /* uncore_snb.c */ int snb_uncore_pci_init(void); @@ -634,6 +635,9 @@ void icx_uncore_mmio_init(void); int spr_uncore_pci_init(void); void spr_uncore_cpu_init(void); void spr_uncore_mmio_init(void); +int gnr_uncore_pci_init(void); +void gnr_uncore_cpu_init(void); +void gnr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index cb488e41807c..9a698a92962a 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -125,7 +125,8 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, int die, bool parsed) { struct intel_uncore_discovery_type *type; - unsigned int *box_offset, *ids; + unsigned int *ids; + u64 *box_offset; int i; if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { @@ -153,7 +154,7 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, if (!type) return; - box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); + box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL); if (!box_offset) return; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 6ee80ad3423e..22e769a81103 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -125,7 +125,7 @@ struct intel_uncore_discovery_type { u8 ctr_offset; /* Counter 0 offset */ u16 num_boxes; /* number of boxes for the uncore block */ unsigned int *ids; /* Box IDs */ - unsigned int *box_offset; /* Box offset */ + u64 *box_offset; /* Box offset */ }; bool intel_uncore_has_discovery_tables(int *ignore); diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 173e2674be6e..56eea2c66cfb 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -306,7 +306,7 @@ static const struct attribute_group nhmex_uncore_cbox_format_group = { }; /* msr offset for each instance of cbox */ -static unsigned nhmex_cbox_msr_offsets[] = { +static u64 nhmex_cbox_msr_offsets[] = { 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 8250f0f59c2b..a96496bef678 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1396,6 +1396,29 @@ err: return ret; } +static int topology_gidnid_map(int nodeid, u32 gidnid) +{ + int i, die_id = -1; + + /* + * every three bits in the Node ID mapping register maps + * to a particular node. + */ + for (i = 0; i < 8; i++) { + if (nodeid == GIDNIDMAP(gidnid, i)) { + if (topology_max_die_per_package() > 1) + die_id = i; + else + die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; + break; + } + } + + return die_id; +} + /* * build pci bus to socket mapping */ @@ -1435,22 +1458,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool break; } - /* - * every three bits in the Node ID mapping register maps - * to a particular node. - */ - for (i = 0; i < 8; i++) { - if (nodeid == GIDNIDMAP(config, i)) { - if (topology_max_die_per_package() > 1) - die_id = i; - else - die_id = topology_phys_to_logical_pkg(i); - if (die_id < 0) - die_id = -ENODEV; - map->pbus_to_dieid[bus] = die_id; - break; - } - } + map->pbus_to_dieid[bus] = topology_gidnid_map(nodeid, config); raw_spin_unlock(&pci2phy_map_lock); } else { segment = pci_domain_nr(ubox_dev->bus); @@ -5278,7 +5286,7 @@ void snr_uncore_mmio_init(void) /* ICX uncore support */ -static unsigned icx_cha_msr_offsets[] = { +static u64 icx_cha_msr_offsets[] = { 0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310, 0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e, 0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a, @@ -5326,7 +5334,7 @@ static struct intel_uncore_type icx_uncore_chabox = { .format_group = &snr_uncore_chabox_format_group, }; -static unsigned icx_msr_offsets[] = { +static u64 icx_msr_offsets[] = { 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, }; @@ -5596,7 +5604,7 @@ static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, i struct pci_dev *ubox = NULL; struct pci_dev *dev = NULL; u32 nid, gid; - int i, idx, ret = -EPERM; + int idx, lgc_pkg, ret = -EPERM; struct intel_uncore_topology *upi; unsigned int devfn; @@ -5611,20 +5619,21 @@ static int discover_upi_topology(struct intel_uncore_type *type, int ubox_did, i break; } - for (i = 0; i < 8; i++) { - if (nid != GIDNIDMAP(gid, i)) - continue; - for (idx = 0; idx < type->num_boxes; idx++) { - upi = &type->topology[nid][idx]; - devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION); - dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus), - ubox->bus->number, - devfn); - if (dev) { - ret = upi_fill_topology(dev, upi, idx); - if (ret) - goto err; - } + lgc_pkg = topology_gidnid_map(nid, gid); + if (lgc_pkg < 0) { + ret = -EPERM; + goto err; + } + for (idx = 0; idx < type->num_boxes; idx++) { + upi = &type->topology[lgc_pkg][idx]; + devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION); + dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus), + ubox->bus->number, + devfn); + if (dev) { + ret = upi_fill_topology(dev, upi, idx); + if (ret) + goto err; } } } @@ -6079,13 +6088,16 @@ static struct uncore_event_desc spr_uncore_imc_events[] = { { /* end: all zeroes */ }, }; +#define SPR_UNCORE_MMIO_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_mmio_ops + static struct intel_uncore_type spr_uncore_imc = { - SPR_UNCORE_COMMON_FORMAT(), + SPR_UNCORE_MMIO_COMMON_FORMAT(), .name = "imc", .fixed_ctr_bits = 48, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, - .ops = &spr_uncore_mmio_ops, .event_descs = spr_uncore_imc_events, }; @@ -6181,7 +6193,7 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { */ #define SPR_UNCORE_UPI_NUM_BOXES 4 -static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { +static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { 0, 0x8000, 0x10000, 0x18000 }; @@ -6412,7 +6424,8 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type, static struct intel_uncore_type ** uncore_get_uncores(enum uncore_access_type type_id, int num_extra, - struct intel_uncore_type **extra) + struct intel_uncore_type **extra, int max_num_types, + struct intel_uncore_type **uncores) { struct intel_uncore_type **types, **start_types; int i; @@ -6421,9 +6434,9 @@ uncore_get_uncores(enum uncore_access_type type_id, int num_extra, /* Only copy the customized features */ for (; *types; types++) { - if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) + if ((*types)->type_id >= max_num_types) continue; - uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); + uncore_type_customized_copy(*types, uncores[(*types)->type_id]); } for (i = 0; i < num_extra; i++, types++) @@ -6470,7 +6483,9 @@ void spr_uncore_cpu_init(void) uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, UNCORE_SPR_MSR_EXTRA_UNCORES, - spr_msr_uncores); + spr_msr_uncores, + UNCORE_SPR_NUM_UNCORE_TYPES, + spr_uncores); type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA); if (type) { @@ -6552,7 +6567,9 @@ int spr_uncore_pci_init(void) spr_update_device_location(UNCORE_SPR_M3UPI); uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, UNCORE_SPR_PCI_EXTRA_UNCORES, - spr_pci_uncores); + spr_pci_uncores, + UNCORE_SPR_NUM_UNCORE_TYPES, + spr_uncores); return 0; } @@ -6560,15 +6577,116 @@ void spr_uncore_mmio_init(void) { int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); - if (ret) - uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); - else { + if (ret) { + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL, + UNCORE_SPR_NUM_UNCORE_TYPES, + spr_uncores); + } else { uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, UNCORE_SPR_MMIO_EXTRA_UNCORES, - spr_mmio_uncores); + spr_mmio_uncores, + UNCORE_SPR_NUM_UNCORE_TYPES, + spr_uncores); spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; } } /* end of SPR uncore support */ + +/* GNR uncore support */ + +#define UNCORE_GNR_NUM_UNCORE_TYPES 23 +#define UNCORE_GNR_TYPE_15 15 +#define UNCORE_GNR_B2UPI 18 +#define UNCORE_GNR_TYPE_21 21 +#define UNCORE_GNR_TYPE_22 22 + +int gnr_uncore_units_ignore[] = { + UNCORE_SPR_UPI, + UNCORE_GNR_TYPE_15, + UNCORE_GNR_B2UPI, + UNCORE_GNR_TYPE_21, + UNCORE_GNR_TYPE_22, + UNCORE_IGNORE_END +}; + +static struct intel_uncore_type gnr_uncore_ubox = { + .name = "ubox", + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type gnr_uncore_b2cmi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "b2cmi", +}; + +static struct intel_uncore_type gnr_uncore_b2cxl = { + SPR_UNCORE_MMIO_COMMON_FORMAT(), + .name = "b2cxl", +}; + +static struct intel_uncore_type gnr_uncore_mdf_sbo = { + .name = "mdf_sbo", + .attr_update = uncore_alias_groups, +}; + +static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { + &spr_uncore_chabox, + &spr_uncore_iio, + &spr_uncore_irp, + NULL, + &spr_uncore_pcu, + &gnr_uncore_ubox, + &spr_uncore_imc, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + &gnr_uncore_b2cmi, + &gnr_uncore_b2cxl, + NULL, + NULL, + &gnr_uncore_mdf_sbo, + NULL, + NULL, +}; + +static struct freerunning_counters gnr_iio_freerunning[] = { + [SPR_IIO_MSR_IOCLK] = { 0x290e, 0x01, 0x10, 1, 48 }, + [SPR_IIO_MSR_BW_IN] = { 0x360e, 0x10, 0x80, 8, 48 }, + [SPR_IIO_MSR_BW_OUT] = { 0x2e0e, 0x10, 0x80, 8, 48 }, +}; + +void gnr_uncore_cpu_init(void) +{ + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, + UNCORE_SPR_MSR_EXTRA_UNCORES, + spr_msr_uncores, + UNCORE_GNR_NUM_UNCORE_TYPES, + gnr_uncores); + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); + spr_uncore_iio_free_running.freerunning = gnr_iio_freerunning; +} + +int gnr_uncore_pci_init(void) +{ + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL, + UNCORE_GNR_NUM_UNCORE_TYPES, + gnr_uncores); + return 0; +} + +void gnr_uncore_mmio_init(void) +{ + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL, + UNCORE_GNR_NUM_UNCORE_TYPES, + gnr_uncores); +} + +/* end of GNR uncore support */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 53dd5d495ba6..fb56518356ec 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +static inline bool is_branch_counters_group(struct perf_event *event) +{ + return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ @@ -283,6 +288,7 @@ struct cpu_hw_events { int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ union { struct er_account *lbr_sel; struct er_account *lbr_ctl; @@ -888,6 +894,7 @@ struct x86_pmu { unsigned int lbr_mispred:1; unsigned int lbr_timed_lbr:1; unsigned int lbr_br_type:1; + unsigned int lbr_counters:4; void (*lbr_reset)(void); void (*lbr_read)(struct cpu_hw_events *cpuc); @@ -1012,6 +1019,7 @@ do { \ #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ +#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); void intel_ds_init(void); +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event); + void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, struct perf_event_pmu_context *next_epc); diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index 1dc19b9b4426..6c977c19f2cd 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -20,3 +20,5 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ +PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ +PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1d51e1850ed0..737a52b89e64 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -237,6 +237,11 @@ #define LBR_INFO_CYCLES 0xffff #define LBR_INFO_BR_TYPE_OFFSET 56 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) +#define LBR_INFO_BR_CNTR_OFFSET 32 +#define LBR_INFO_BR_CNTR_NUM 4 +#define LBR_INFO_BR_CNTR_BITS 2 +#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) +#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) #define MSR_ARCH_LBR_CTL 0x000014ce #define ARCH_LBR_CTL_LBREN BIT(0) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2618ec7c3d1d..3736b8a46c04 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -31,6 +31,7 @@ #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35) #define INTEL_FIXED_BITS_MASK 0xFULL #define INTEL_FIXED_BITS_STRIDE 4 @@ -223,6 +224,9 @@ union cpuid28_ecx { unsigned int lbr_timed_lbr:1; /* Branch Type Field Supported */ unsigned int lbr_br_type:1; + unsigned int reserved:13; + /* Branch counters (Event Logging) Supported */ + unsigned int lbr_counters:4; } split; unsigned int full; }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2cc2aa120b4b..3f57ce68a3f1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -757,6 +757,7 @@ const struct cpumask *cpu_clustergroup_mask(int cpu) { return cpu_l2c_shared_mask(cpu); } +EXPORT_SYMBOL_GPL(cpu_clustergroup_mask); static void impress_friends(void) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5547ba68e6e4..d2a15c0c6f8a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1143,6 +1143,15 @@ static inline bool branch_sample_priv(const struct perf_event *event) return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; } +static inline bool branch_sample_counters(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; +} + +static inline bool branch_sample_call_stack(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} struct perf_sample_data { /* @@ -1177,6 +1186,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + u64 *br_stack_cntr; union perf_sample_weight weight; union perf_mem_data_src data_src; u64 txn; @@ -1254,7 +1264,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, - struct perf_branch_stack *brs) + struct perf_branch_stack *brs, + u64 *brs_cntr) { int size = sizeof(u64); /* nr */ @@ -1262,7 +1273,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); + /* + * The extension space for counters is appended after the + * struct perf_branch_stack. It is used to store the occurrences + * of events of each branch. + */ + if (brs_cntr) + size += brs->nr * sizeof(u64); + data->br_stack = brs; + data->br_stack_cntr = brs_cntr; data->dyn_size += size; data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 39c6a250dd1b..3a64499b0f5d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi @@ -1427,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9efd0d7775e7..f0f0f71213a1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7397,6 +7397,14 @@ void perf_output_sample(struct perf_output_handle *handle, if (branch_sample_hw_index(event)) perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); + /* + * Add the extension space which is appended + * right after the struct perf_branch_stack. + */ + if (data->br_stack_cntr) { + size = data->br_stack->nr * sizeof(u64); + perf_output_copy(handle, data->br_stack_cntr, size); + } } else { /* * we always store at least the value of nr @@ -11425,9 +11433,30 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms); static struct attribute *pmu_dev_attrs[] = { &dev_attr_type.attr, &dev_attr_perf_event_mux_interval_ms.attr, + &dev_attr_nr_addr_filters.attr, + NULL, +}; + +static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + + if (n == 2 && !pmu->nr_addr_filters) + return 0; + + return a->mode; +} + +static struct attribute_group pmu_dev_attr_group = { + .is_visible = pmu_dev_is_visible, + .attrs = pmu_dev_attrs, +}; + +static const struct attribute_group *pmu_dev_groups[] = { + &pmu_dev_attr_group, NULL, }; -ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { @@ -11464,18 +11493,11 @@ static int pmu_dev_alloc(struct pmu *pmu) if (ret) goto free_dev; - /* For PMUs with address filters, throw in an extra attribute: */ - if (pmu->nr_addr_filters) - ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); - - if (ret) - goto del_dev; - - if (pmu->attr_update) + if (pmu->attr_update) { ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); - - if (ret) - goto del_dev; + if (ret) + goto del_dev; + } out: return ret; |