diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-16 17:13:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-16 17:13:31 -0700 |
commit | 576a997c6315ee482519e7cc080f341b07638808 (patch) | |
tree | 625cff628b8db0638ed496f560d38eda0375cacb /arch/x86/events/amd/uncore.c | |
parent | 4a996d90b9e046c6d59845acf00a54d464c34ff3 (diff) | |
parent | fa0c1c9d283b37fdb7fc1dcccbb88fc8f48a4aa4 (diff) |
Merge tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar:
- Intel PT support enhancements & fixes
- Fix leaked SIGTRAP events
- Improve and fix the Intel uncore driver
- Add support for Intel HBM and CXL uncore counters
- Add Intel Lake and Arrow Lake support
- AMD uncore driver fixes
- Make SIGTRAP and __perf_pending_irq() work on RT
- Micro-optimizations
- Misc cleanups and fixes
* tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
perf/x86/intel: Add a distinct name for Granite Rapids
perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake
perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated
perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR
perf: Split __perf_pending_irq() out of perf_pending_irq()
perf: Don't disable preemption in perf_pending_task().
perf: Move swevent_htable::recursion into task_struct.
perf: Shrink the size of the recursion counter.
perf: Enqueue SIGTRAP always via task_work.
task_work: Add TWA_NMI_CURRENT as an additional notify mode.
perf: Move irq_work_queue() where the event is prepared.
perf: Fix event leak upon exec and file release
perf: Fix event leak upon exit
task_work: Introduce task_work_cancel() again
task_work: s/task_work_cancel()/task_work_cancel_func()/
perf/x86/amd/uncore: Fix DF and UMC domain identification
perf/x86/amd/uncore: Avoid PMU registration if counters are unavailable
perf/x86/intel: Support Perfmon MSRs aliasing
perf/x86/intel: Support PERFEVTSEL extension
perf/x86: Add config_mask to represent EVENTSEL bitmask
...
Diffstat (limited to 'arch/x86/events/amd/uncore.c')
-rw-r--r-- | arch/x86/events/amd/uncore.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 4ccb8fa483e6..0bfde2ea5cb8 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags) /* if not, take the first available counter */ hwc->idx = -1; for (i = 0; i < pmu->num_counters; i++) { - if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { + struct perf_event *tmp = NULL; + + if (try_cmpxchg(&ctx->events[i], &tmp, event)) { hwc->idx = i; break; } @@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags) event->pmu->stop(event, PERF_EF_UPDATE); for (i = 0; i < pmu->num_counters; i++) { - if (cmpxchg(&ctx->events[i], event, NULL) == event) + struct perf_event *tmp = event; + + if (try_cmpxchg(&ctx->events[i], &tmp, NULL)) break; } @@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_NB; info.split.gid = 0; - info.split.cid = topology_die_id(cpu); + info.split.cid = topology_logical_package_id(cpu); if (pmu_version >= 2) { ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); @@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **df_attr = amd_uncore_df_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, the Northbridge counters are repurposed @@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; pmu->group = amd_uncore_ctx_gid(uncore, cpu); @@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **l3_attr = amd_uncore_l3_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, L3 cache counters are available instead @@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; pmu->group = amd_uncore_ctx_gid(uncore, cpu); @@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); info.split.aux_data = ecx; /* stash active mask */ info.split.num_pmcs = ebx.split.num_umc_pmc; - info.split.gid = topology_die_id(cpu); - info.split.cid = topology_die_id(cpu); + info.split.gid = topology_logical_package_id(cpu); + info.split.cid = topology_logical_package_id(cpu); *per_cpu_ptr(uncore->info, cpu) = info; } |