diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:29:55 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:29:55 -0800 |
commit | a2f0e7eee1344eb9f91b22bc72d9eb0a52b849c9 (patch) | |
tree | cba6f8b16f26c986e66b291416103c859697b6b0 /arch | |
parent | 6e649d08568220ee88deef0a1ad8b3a935420cf2 (diff) | |
parent | c828441f21ddc819a28b5723a72e3c840e9de1c6 (diff) |
Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
- Optimize perf_sample_data layout
- Prepare sample data handling for BPF integration
- Update the x86 PMU driver for Intel Meteor Lake
- Restructure the x86 uncore code to fix a SPR (Sapphire Rapids)
discovery breakage
- Fix the x86 Zhaoxin PMU driver
- Cleanups
* tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
perf/x86/intel/uncore: Add Meteor Lake support
x86/perf/zhaoxin: Add stepping check for ZXC
perf/x86/intel/ds: Fix the conversion from TSC to perf time
perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
perf/x86/uncore: Add a quirk for UPI on SPR
perf/x86/uncore: Ignore broken units in discovery table
perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
perf/x86/uncore: Factor out uncore_device_to_die()
perf/core: Call perf_prepare_sample() before running BPF
perf/core: Introduce perf_prepare_header()
perf/core: Do not pass header for sample ID init
perf/core: Set data->sample_flags in perf_prepare_sample()
perf/core: Add perf_sample_save_brstack() helper
perf/core: Add perf_sample_save_raw_data() helper
perf/core: Add perf_sample_save_callchain() helper
perf/core: Save the dynamic parts of sample data size
x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation
perf/core: Change the layout of perf_sample_data
perf/x86/msr: Add Meteor Lake support
perf/x86/cstate: Add Meteor Lake support
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 3 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_cf.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/perf_cpum_sf.c | 3 | ||||
-rw-r--r-- | arch/s390/kernel/perf_pai_crypto.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/perf_pai_ext.c | 4 | ||||
-rw-r--r-- | arch/x86/events/amd/core.c | 6 | ||||
-rw-r--r-- | arch/x86/events/amd/ibs.c | 9 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 199 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 133 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 4 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 41 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 5 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_discovery.c | 60 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_discovery.h | 14 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 161 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 158 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 23 | ||||
-rw-r--r-- | arch/x86/events/zhaoxin/core.c | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 34 |
22 files changed, 716 insertions, 169 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index bf318dd9b709..8c1f7def596e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(event, cpuhw); - data.br_stack = &cpuhw->bhrb_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack); } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index f043a7ff220b..aa38649c7c27 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -662,9 +662,7 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ce886a03545a..ead6eea48be8 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event, /* Protect callchain buffers, tasks */ rcu_read_lock(); - perf_prepare_sample(&header, data, event, regs); + perf_prepare_sample(data, event, regs); + perf_prepare_header(&header, data, event, regs); if (perf_output_begin(&handle, data, event, header.size)) goto out; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 985e243a2ed8..a7b339c4fd7c 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -362,9 +362,7 @@ static int paicrypt_push_sample(void) if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 1138f57baae3..555597222bad 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -451,9 +451,7 @@ static int paiext_push_sample(void) if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 4386b10682ce..8c45b198b62f 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (has_branch_stack(event)) { - data.br_stack = &cpuc->lbr_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index da3f5ebac4e1..64582954b5f6 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1110,8 +1110,7 @@ fail: .data = ibs_data.data, }, }; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } if (perf_ibs == &perf_ibs_op) @@ -1122,10 +1121,8 @@ fail: * recorded as part of interrupt regs. Thus we need to use rip from * interrupt regs while unwinding call stack. */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { - data.callchain = perf_callchain(event, iregs); - data.sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); out: diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index bafdc2be479a..9fce2d1247a7 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0), + INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0), + INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1), + EVENT_EXTRA_END +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) { - data.br_stack = &cpuc->lbr_stack; - data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); @@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event) static struct event_constraint counter0_constraint = INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); +static struct event_constraint counter1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x2); + +static struct event_constraint counter0_1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x3); + static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); @@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint = static struct event_constraint fixed0_counter0_constraint = INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); +static struct event_constraint fixed0_counter0_1_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL); + +static struct event_constraint counters_1_7_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL); + static struct event_constraint * hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return &emptyconstraint; } +static struct event_constraint * +cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_get_event_constraints(cpuc, idx, event); + + /* + * The :ppp indicates the Precise Distribution (PDist) facility, which + * is only supported on the GP counter 0 & 1 and Fixed counter 0. + * If a :ppp event which is not available on the above eligible counters, + * error out. + */ + if (event->attr.precise_ip == 3) { + /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ + if (constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_counter0_1_constraint; + + switch (c->idxmsk64 & 0x3ull) { + case 0x1: + return &counter0_constraint; + case 0x2: + return &counter1_constraint; + case 0x3: + return &counter0_1_constraint; + } + return &emptyconstraint; + } + + return c; +} + +static struct event_constraint * +rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + c = spr_get_event_constraints(cpuc, idx, event); + + /* The Retire Latency is not supported by the fixed counter 0. */ + if (event->attr.precise_ip && + (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) && + constraint_match(&fixed0_constraint, event->hw.config)) { + /* + * The Instruction PDIR is only available + * on the fixed counter 0. Error out for this case. + */ + if (event->attr.precise_ip == 3) + return &emptyconstraint; + return &counters_1_7_constraint; + } + + return c; +} + +static struct event_constraint * +mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->cpu_type == hybrid_big) + return rwc_get_event_constraints(cpuc, idx, event); + if (pmu->cpu_type == hybrid_small) + return cmt_get_event_constraints(cpuc, idx, event); + + WARN_ON(1); + return &emptyconstraint; +} + static int adl_hw_config(struct perf_event *event) { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); @@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data) } } +static void intel_pmu_check_num_counters(int *num_counters, + int *num_counters_fixed, + u64 *intel_ctrl, u64 fixed_mask); + +static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +{ + unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); + unsigned int eax, ebx, ecx, edx; + + if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, + &eax, &ebx, &ecx, &edx); + pmu->num_counters = fls(eax); + pmu->num_counters_fixed = fls(ebx); + intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, + &pmu->intel_ctrl, ebx); + } +} + static bool init_hybrid_pmu(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu) if (!cpumask_empty(&pmu->supported_cpus)) goto end; + if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(pmu); + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) return false; @@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = { NULL, }; +static struct attribute *mtl_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_adl), + EVENT_PTR(mem_st_adl), + NULL +}; + EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); @@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); FORMAT_ATTR_HYBRID(frontend, hybrid_big); +#define ADL_HYBRID_RTM_FORMAT_ATTR \ + FORMAT_HYBRID_PTR(in_tx), \ + FORMAT_HYBRID_PTR(in_tx_cp) + +#define ADL_HYBRID_FORMAT_ATTR \ + FORMAT_HYBRID_PTR(offcore_rsp), \ + FORMAT_HYBRID_PTR(ldlat), \ + FORMAT_HYBRID_PTR(frontend) + static struct attribute *adl_hybrid_extra_attr_rtm[] = { - FORMAT_HYBRID_PTR(in_tx), - FORMAT_HYBRID_PTR(in_tx_cp), - FORMAT_HYBRID_PTR(offcore_rsp), - FORMAT_HYBRID_PTR(ldlat), - FORMAT_HYBRID_PTR(frontend), - NULL, + ADL_HYBRID_RTM_FORMAT_ATTR, + ADL_HYBRID_FORMAT_ATTR, + NULL }; static struct attribute *adl_hybrid_extra_attr[] = { - FORMAT_HYBRID_PTR(offcore_rsp), - FORMAT_HYBRID_PTR(ldlat), - FORMAT_HYBRID_PTR(frontend), - NULL, + ADL_HYBRID_FORMAT_ATTR, + NULL +}; + +PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63"); +FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); + +static struct attribute *mtl_hybrid_extra_attr_rtm[] = { + ADL_HYBRID_RTM_FORMAT_ATTR, + ADL_HYBRID_FORMAT_ATTR, + FORMAT_HYBRID_PTR(snoop_rsp), + NULL +}; + +static struct attribute *mtl_hybrid_extra_attr[] = { + ADL_HYBRID_FORMAT_ATTR, + FORMAT_HYBRID_PTR(snoop_rsp), + NULL }; static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) @@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) } } +static __always_inline bool is_mtl(u8 x86_model) +{ + return (x86_model == INTEL_FAM6_METEORLAKE) || + (x86_model == INTEL_FAM6_METEORLAKE_L); +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * @@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_adl(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.num_topdown_events = 8; static_call_update(intel_pmu_update_topdown_event, @@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void) pmu->event_constraints = intel_slm_event_constraints; pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; - pr_cont("Alderlake Hybrid events, "); - name = "alderlake_hybrid"; + if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.pebs_latency_data = mtl_latency_data_small; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + mem_attr = mtl_hybrid_mem_attrs; + intel_pmu_pebs_data_source_mtl(); + x86_pmu.get_event_constraints = mtl_get_event_constraints; + pmu->extra_regs = intel_cmt_extra_regs; + pr_cont("Meteorlake Hybrid events, "); + name = "meteorlake_hybrid"; + } else { + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + intel_pmu_pebs_data_source_adl(); + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + } break; default: @@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void) if (is_hybrid()) intel_pmu_check_hybrid_pmus((u64)fixed_mask); + if (x86_pmu.intel_cap.pebs_timing_info) + x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; + intel_aux_output_init(); return 0; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 88e58b6ee73c..b0354dc869d2 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2,12 +2,14 @@ #include <linux/bitops.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/sched/clock.h> #include <asm/cpu_entry_area.h> #include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/insn.h> #include <asm/io.h> +#include <asm/timer.h> #include "../perf_event.h" @@ -53,6 +55,13 @@ union intel_x86_pebs_dse { unsigned int st_lat_locked:1; unsigned int ld_reserved3:26; }; + struct { + unsigned int mtl_dse:5; + unsigned int mtl_locked:1; + unsigned int mtl_stlb_miss:1; + unsigned int mtl_fwd_blk:1; + unsigned int ld_reserved4:24; + }; }; @@ -135,6 +144,29 @@ void __init intel_pmu_pebs_data_source_adl(void) __intel_pmu_pebs_data_source_grt(data_source); } +static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source) +{ + data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_mtl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + intel_pmu_pebs_data_source_cmt(data_source); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -219,24 +251,19 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) } /* Retrieve the latency data for e-core of ADL */ -u64 adl_latency_data_small(struct perf_event *event, u64 status) +static u64 __adl_latency_data_small(struct perf_event *event, u64 status, + u8 dse, bool tlb, bool lock, bool blk) { - union intel_x86_pebs_dse dse; u64 val; WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); - dse.val = status; - - val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + dse &= PERF_PEBS_DATA_SOURCE_MASK; + val = hybrid_var(event->pmu, pebs_data_source)[dse]; - /* - * For the atom core on ADL, - * bit 4: lock, bit 5: TLB access. - */ - pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + pebs_set_tlb_lock(&val, tlb, lock); - if (dse.ld_data_blk) + if (blk) val |= P(BLK, DATA); else val |= P(BLK, NA); @@ -244,6 +271,29 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status) return val; } +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + + dse.val = status; + + return __adl_latency_data_small(event, status, dse.ld_dse, + dse.ld_locked, dse.ld_stlb_miss, + dse.ld_data_blk); +} + +/* Retrieve the latency data for e-core of MTL */ +u64 mtl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + + dse.val = status; + + return __adl_latency_data_small(event, status, dse.mtl_dse, + dse.mtl_stlb_miss, dse.mtl_locked, + dse.mtl_fwd_blk); +} + static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; @@ -759,7 +809,8 @@ int intel_pmu_drain_bts_buffer(void) * the sample. */ rcu_read_lock(); - perf_prepare_sample(&header, &data, event, ®s); + perf_prepare_sample(&data, event, ®s); + perf_prepare_header(&header, &data, event, ®s); if (perf_output_begin(&handle, &data, event, header.size * (top - base - skip))) @@ -1519,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux) return val; } +static void setup_pebs_time(struct perf_event *event, + struct perf_sample_data *data, + u64 tsc) +{ + /* Converting to a user-defined clock is not supported yet. */ + if (event->attr.use_clockid != 0) + return; + + /* + * Doesn't support the conversion when the TSC is unstable. + * The TSC unstable case is a corner case and very unlikely to + * happen. If it happens, the TSC in a PEBS record will be + * dropped and fall back to perf_event_clock(). + */ + if (!using_native_sched_clock() || !sched_clock_stable()) + return; + + data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset; + data->sample_flags |= PERF_SAMPLE_TIME; +} + #define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_DATA_PAGE_SIZE) @@ -1569,10 +1641,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - data->callchain = perf_callchain(event, iregs); - data->sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(data, event, iregs); /* * We use the interrupt regs as a base because the PEBS record does not @@ -1668,16 +1738,11 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * * We can only do this for the default trace clock. */ - if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) { - data->time = native_sched_clock_from_tsc(pebs->tsc); - data->sample_flags |= PERF_SAMPLE_TIME; - } + if (x86_pmu.intel_cap.pebs_format >= 3) + setup_pebs_time(event, data, pebs->tsc); - if (has_branch_stack(event)) { - data->br_stack = &cpuc->lbr_stack; - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; - } + if (has_branch_stack(event)) + perf_sample_save_brstack(data, event, &cpuc->lbr_stack); } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1705,6 +1770,7 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #define PEBS_LATENCY_MASK 0xffff #define PEBS_CACHE_LATENCY_OFFSET 32 +#define PEBS_RETIRE_LATENCY_OFFSET 32 /* * With adaptive PEBS the layout depends on what fields are configured. @@ -1735,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) { - data->time = native_sched_clock_from_tsc(basic->tsc); - data->sample_flags |= PERF_SAMPLE_TIME; - } + setup_pebs_time(event, data, basic->tsc); /* * We must however always use iregs for the unwinder to stay sane; the @@ -1746,16 +1809,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - data->callchain = perf_callchain(event, iregs); - data->sample_flags |= PERF_SAMPLE_CALLCHAIN; - } + if (sample_type & PERF_SAMPLE_CALLCHAIN) + perf_sample_save_callchain(data, event, iregs); *regs = *iregs; /* The ip in basic is EventingIP */ set_linear_ip(regs, basic->ip); regs->flags = PERF_EFLAGS_EXACT; + if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)) + data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; + /* * The record for MEMINFO is in front of GP * But PERF_SAMPLE_TRANSACTION needs gprs->ax. @@ -1835,8 +1899,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); - data->br_stack = &cpuc->lbr_stack; - data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + perf_sample_save_brstack(data, event, &cpuc->lbr_stack); } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 1f21f576ca77..c3b0d15a9841 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1606,12 +1606,10 @@ clear_arch_lbr: */ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { - int lbr_fmt = x86_pmu.intel_cap.lbr_format; - lbr->nr = x86_pmu.lbr_nr; lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; - lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; + lbr->info = x86_pmu.lbr_info; } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 459b1aafd4d4..bc226603ef3e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -65,6 +65,21 @@ int uncore_die_to_segment(int die) return bus ? pci_domain_nr(bus) : -EINVAL; } +int uncore_device_to_die(struct pci_dev *dev) +{ + int node = pcibus_to_node(dev->bus); + int cpu; + + for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) + return c->logical_die_id; + } + + return -1; +} + static void uncore_free_pcibus_map(void) { struct pci2phy_map *map, *tmp; @@ -842,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; +static inline int uncore_get_box_id(struct intel_uncore_type *type, + struct intel_uncore_pmu *pmu) +{ + return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; +} + void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) { struct intel_uncore_type *type = pmu->type; @@ -850,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) sprintf(pmu_name, "uncore_type_%u", type->type_id); else { sprintf(pmu_name, "uncore_type_%u_%d", - type->type_id, type->box_ids[pmu->pmu_idx]); + type->type_id, uncore_get_box_id(type, pmu)); } } @@ -877,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu) * Use the box ID from the discovery table if applicable. */ sprintf(pmu->name, "uncore_%s_%d", type->name, - type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx); + uncore_get_box_id(type, pmu)); } } @@ -1674,7 +1695,10 @@ struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); + /* Discovery table is required */ bool use_discovery; + /* The units in the discovery table should be ignored. */ + int *uncore_units_ignore; }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1765,6 +1789,11 @@ static const struct intel_uncore_init_fun adl_uncore_init __initconst = { .mmio_init = adl_uncore_mmio_init, }; +static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { + .cpu_init = mtl_uncore_cpu_init, + .mmio_init = adl_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun icx_uncore_init __initconst = { .cpu_init = icx_uncore_cpu_init, .pci_init = icx_uncore_pci_init, @@ -1782,6 +1811,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = { .pci_init = spr_uncore_pci_init, .mmio_init = spr_uncore_mmio_init, .use_discovery = true, + .uncore_units_ignore = spr_uncore_units_ignore, }; static const struct intel_uncore_init_fun generic_uncore_init __initconst = { @@ -1832,6 +1862,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), @@ -1853,7 +1885,7 @@ static int __init intel_uncore_init(void) id = x86_match_cpu(intel_uncore_match); if (!id) { - if (!uncore_no_discover && intel_uncore_has_discovery_tables()) + if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL)) uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; else return -ENODEV; @@ -1861,7 +1893,8 @@ static int __init intel_uncore_init(void) uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_no_discover && uncore_init->use_discovery) return -ENODEV; - if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables()) + if (uncore_init->use_discovery && + !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) return -ENODEV; } diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e278e2e7c051..c30fb5bb1222 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -34,6 +34,8 @@ #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +#define UNCORE_IGNORE_END -1 + struct pci_extra_dev { struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; }; @@ -208,6 +210,7 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_dieid(struct pci_bus *bus); int uncore_die_to_segment(int die); +int uncore_device_to_die(struct pci_dev *dev); ssize_t uncore_event_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -589,6 +592,7 @@ extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; +extern int spr_uncore_units_ignore[]; /* uncore_snb.c */ int snb_uncore_pci_init(void); @@ -602,6 +606,7 @@ void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); void adl_uncore_cpu_init(void); +void mtl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); void adl_uncore_mmio_init(void); diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 5fd72d4b8bbb..cb488e41807c 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -33,7 +33,7 @@ static int logical_die_id; static int get_device_die_id(struct pci_dev *dev) { - int cpu, node = pcibus_to_node(dev->bus); + int node = pcibus_to_node(dev->bus); /* * If the NUMA info is not available, assume that the logical die id is @@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev) if (node < 0) return logical_die_id++; - for_each_cpu(cpu, cpumask_of_node(node)) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (c->initialized && cpu_to_node(cpu) == node) - return c->logical_die_id; - } - - /* - * All CPUs of a node may be offlined. For this case, - * the PCI and MMIO type of uncore blocks which are - * enumerated by the device will be unavailable. - */ - return -1; + return uncore_device_to_die(dev); } #define __node_2_type(cur) \ @@ -140,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, unsigned int *box_offset, *ids; int i; - if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset)) + if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { + pr_info("Invalid address is detected for uncore type %d box %d, " + "Disable the uncore unit.\n", + unit->box_type, unit->box_id); return; + } if (parsed) { type = search_uncore_discovery_type(unit->box_type); - if (WARN_ON_ONCE(!type)) + if (!type) { + pr_info("A spurious uncore type %d is detected, " + "Disable the uncore type.\n", + unit->box_type); return; + } /* Store the first box of each die */ if (!type->box_ctrl_die[die]) type->box_ctrl_die[die] = unit->ctl; @@ -181,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, ids[i] = type->ids[i]; box_offset[i] = type->box_offset[i]; - if (WARN_ON_ONCE(unit->box_id == ids[i])) + if (unit->box_id == ids[i]) { + pr_info("Duplicate uncore type %d box ID %d is detected, " + "Drop the duplicate uncore unit.\n", + unit->box_type, unit->box_id); goto free_ids; + } } ids[i] = unit->box_id; box_offset[i] = unit->ctl - type->box_ctrl; @@ -202,8 +202,25 @@ free_box_offset: } +static bool +uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore) +{ + int i; + + if (!ignore) + return false; + + for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) { + if (unit->box_type == ignore[i]) + return true; + } + + return false; +} + static int parse_discovery_table(struct pci_dev *dev, int die, - u32 bar_offset, bool *parsed) + u32 bar_offset, bool *parsed, + int *ignore) { struct uncore_global_discovery global; struct uncore_unit_discovery unit; @@ -258,6 +275,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die, if (unit.access_type >= UNCORE_ACCESS_MAX) continue; + if (uncore_ignore_unit(&unit, ignore)) + continue; + uncore_insert_box_info(&unit, die, *parsed); } @@ -266,7 +286,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die, return 0; } -bool intel_uncore_has_discovery_tables(void) +bool intel_uncore_has_discovery_tables(int *ignore) { u32 device, val, entry_id, bar_offset; int die, dvsec = 0, ret = true; @@ -302,7 +322,7 @@ bool intel_uncore_has_discovery_tables(void) if (die < 0) continue; - parse_discovery_table(dev, die, bar_offset, &parsed); + parse_discovery_table(dev, die, bar_offset, &parsed, ignore); } } diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index f4439357779a..6ee80ad3423e 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -21,9 +21,15 @@ /* Global discovery table size */ #define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 -#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7) -#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff) -#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff) +#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET 28 +#define UNCORE_DISCOVERY_PCI_DOMAIN(data) \ + ((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7) +#define UNCORE_DISCOVERY_PCI_BUS_OFFSET 20 +#define UNCORE_DISCOVERY_PCI_BUS(data) \ + ((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff) +#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET 12 +#define UNCORE_DISCOVERY_PCI_DEVFN(data) \ + ((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff) #define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff) @@ -122,7 +128,7 @@ struct intel_uncore_discovery_type { unsigned int *box_offset; /* Box offset */ }; -bool intel_uncore_has_discovery_tables(void); +bool intel_uncore_has_discovery_tables(int *ignore); void intel_uncore_clear_discovery_tables(void); void intel_uncore_generic_uncore_cpu_init(void); int intel_uncore_generic_uncore_pci_init(void); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 1f4869227efb..7fd4334e12a1 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -109,6 +109,19 @@ #define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728 #define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729 #define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A +#define PCI_DEVICE_ID_INTEL_MTL_1_IMC 0x7d00 +#define PCI_DEVICE_ID_INTEL_MTL_2_IMC 0x7d01 +#define PCI_DEVICE_ID_INTEL_MTL_3_IMC 0x7d02 +#define PCI_DEVICE_ID_INTEL_MTL_4_IMC 0x7d05 +#define PCI_DEVICE_ID_INTEL_MTL_5_IMC 0x7d10 +#define PCI_DEVICE_ID_INTEL_MTL_6_IMC 0x7d14 +#define PCI_DEVICE_ID_INTEL_MTL_7_IMC 0x7d15 +#define PCI_DEVICE_ID_INTEL_MTL_8_IMC 0x7d16 +#define PCI_DEVICE_ID_INTEL_MTL_9_IMC 0x7d21 +#define PCI_DEVICE_ID_INTEL_MTL_10_IMC 0x7d22 +#define PCI_DEVICE_ID_INTEL_MTL_11_IMC 0x7d23 +#define PCI_DEVICE_ID_INTEL_MTL_12_IMC 0x7d24 +#define PCI_DEVICE_ID_INTEL_MTL_13_IMC 0x7d28 #define IMC_UNCORE_DEV(a) \ @@ -205,6 +218,32 @@ #define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0 #define ADL_UNC_ARB_MSR_OFFSET 0x8 +/* MTL Cbo register */ +#define MTL_UNC_CBO_0_PER_CTR0 0x2448 +#define MTL_UNC_CBO_0_PERFEVTSEL0 0x2442 + +/* MTL HAC_ARB register */ +#define MTL_UNC_HAC_ARB_CTR 0x2018 +#define MTL_UNC_HAC_ARB_CTRL 0x2012 + +/* MTL ARB register */ +#define MTL_UNC_ARB_CTR 0x2418 +#define MTL_UNC_ARB_CTRL 0x2412 + +/* MTL cNCU register */ +#define MTL_UNC_CNCU_FIXED_CTR 0x2408 +#define MTL_UNC_CNCU_FIXED_CTRL 0x2402 +#define MTL_UNC_CNCU_BOX_CTL 0x240e + +/* MTL sNCU register */ +#define MTL_UNC_SNCU_FIXED_CTR 0x2008 +#define MTL_UNC_SNCU_FIXED_CTRL 0x2002 +#define MTL_UNC_SNCU_BOX_CTL 0x200e + +/* MTL HAC_CBO register */ +#define MTL_UNC_HBO_CTR 0x2048 +#define MTL_UNC_HBO_CTRL 0x2042 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11"); @@ -598,6 +637,115 @@ void adl_uncore_cpu_init(void) uncore_msr_uncores = adl_msr_uncores; } +static struct intel_uncore_type mtl_uncore_cbox = { + .name = "cbox", + .num_counters = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_CBO_0_PER_CTR0, + .event_ctl = MTL_UNC_CBO_0_PERFEVTSEL0, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_hac_arb = { + .name = "hac_arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_HAC_ARB_CTR, + .event_ctl = MTL_UNC_HAC_ARB_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_arb = { + .name = "arb", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_ARB_CTR, + .event_ctl = MTL_UNC_ARB_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static struct intel_uncore_type mtl_uncore_hac_cbox = { + .name = "hac_cbox", + .num_counters = 2, + .num_boxes = 2, + .perf_ctr_bits = 48, + .perf_ctr = MTL_UNC_HBO_CTR, + .event_ctl = MTL_UNC_HBO_CTRL, + .event_mask = ADL_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &icl_uncore_msr_ops, + .format_group = &adl_uncore_format_group, +}; + +static void mtl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); +} + +static struct intel_uncore_ops mtl_uncore_msr_ops = { + .init_box = mtl_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type mtl_uncore_cncu = { + .name = "cncu", + .num_counters = 1, + .num_boxes = 1, + .box_ctl = MTL_UNC_CNCU_BOX_CTL, + .fixed_ctr_bits = 48, + .fixed_ctr = MTL_UNC_CNCU_FIXED_CTR, + .fixed_ctl = MTL_UNC_CNCU_FIXED_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &mtl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type mtl_uncore_sncu = { + .name = "sncu", + .num_counters = 1, + .num_boxes = 1, + .box_ctl = MTL_UNC_SNCU_BOX_CTL, + .fixed_ctr_bits = 48, + .fixed_ctr = MTL_UNC_SNCU_FIXED_CTR, + .fixed_ctl = MTL_UNC_SNCU_FIXED_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_CTL_EV_SEL_MASK, + .format_group = &icl_uncore_clock_format_group, + .ops = &mtl_uncore_msr_ops, + .event_descs = icl_uncore_events, +}; + +static struct intel_uncore_type *mtl_msr_uncores[] = { + &mtl_uncore_cbox, + &mtl_uncore_hac_arb, + &mtl_uncore_arb, + &mtl_uncore_hac_cbox, + &mtl_uncore_cncu, + &mtl_uncore_sncu, + NULL +}; + +void mtl_uncore_cpu_init(void) +{ + mtl_uncore_cbox.num_boxes = icl_get_cbox_num(); + uncore_msr_uncores = mtl_msr_uncores; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -1264,6 +1412,19 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { IMC_UNCORE_DEV(RPL_23), IMC_UNCORE_DEV(RPL_24), IMC_UNCORE_DEV(RPL_25), + IMC_UNCORE_DEV(MTL_1), + IMC_UNCORE_DEV(MTL_2), + IMC_UNCORE_DEV(MTL_3), + IMC_UNCORE_DEV(MTL_4), + IMC_UNCORE_DEV(MTL_5), + IMC_UNCORE_DEV(MTL_6), + IMC_UNCORE_DEV(MTL_7), + IMC_UNCORE_DEV(MTL_8), + IMC_UNCORE_DEV(MTL_9), + IMC_UNCORE_DEV(MTL_10), + IMC_UNCORE_DEV(MTL_11), + IMC_UNCORE_DEV(MTL_12), + IMC_UNCORE_DEV(MTL_13), { /* end: all zeroes */ } }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 44c2f879f708..7d1199554fe3 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1453,9 +1453,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool } raw_spin_unlock(&pci2phy_map_lock); } else { - int node = pcibus_to_node(ubox_dev->bus); - int cpu; - segment = pci_domain_nr(ubox_dev->bus); raw_spin_lock(&pci2phy_map_lock); map = __find_pci2phy_map(segment); @@ -1465,15 +1462,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool break; } - die_id = -1; - for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) { - struct cpuinfo_x86 *c = &cpu_data(cpu); + map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev); - if (c->initialized && cpu_to_node(cpu) == node) { - map->pbus_to_dieid[bus] = die_id = c->logical_die_id; - break; - } - } raw_spin_unlock(&pci2phy_map_lock); if (WARN_ON_ONCE(die_id == -1)) { @@ -6142,24 +6132,6 @@ static int spr_upi_get_topology(struct intel_uncore_type *type) return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0); } -static struct intel_uncore_type spr_uncore_upi = { - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, - .format_group = &spr_uncore_raw_format_group, - .ops = &spr_uncore_pci_ops, - .name = "upi", - .attr_update = spr_upi_attr_update, - .get_topology = spr_upi_get_topology, - .set_mapping = spr_upi_set_mapping, - .cleanup_mapping = spr_upi_cleanup_mapping, -}; - -static struct intel_uncore_type spr_uncore_m3upi = { - SPR_UNCORE_PCI_COMMON_FORMAT(), - .name = "m3upi", - .constraints = icx_uncore_m3upi_constraints, -}; - static struct intel_uncore_type spr_uncore_mdf = { SPR_UNCORE_COMMON_FORMAT(), .name = "mdf", @@ -6168,7 +6140,13 @@ static struct intel_uncore_type spr_uncore_mdf = { #define UNCORE_SPR_NUM_UNCORE_TYPES 12 #define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IMC 6 +#define UNCORE_SPR_UPI 8 +#define UNCORE_SPR_M3UPI 9 +/* + * The uncore units, which are supported by the discovery table, + * are defined here. + */ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { &spr_uncore_chabox, &spr_uncore_iio, @@ -6178,12 +6156,56 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { NULL, &spr_uncore_imc, &spr_uncore_m2m, - &spr_uncore_upi, - &spr_uncore_m3upi, + NULL, + NULL, NULL, &spr_uncore_mdf, }; +/* + * The uncore units, which are not supported by the discovery table, + * are implemented from here. + */ +#define SPR_UNCORE_UPI_NUM_BOXES 4 + +static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { + 0, 0x8000, 0x10000, 0x18000 +}; + +static struct intel_uncore_type spr_uncore_upi = { + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .format_group = &spr_uncore_raw_format_group, + .ops = &spr_uncore_pci_ops, + .name = "upi", + .attr_update = spr_upi_attr_update, + .get_topology = spr_upi_get_topology, + .set_mapping = spr_upi_set_mapping, + .cleanup_mapping = spr_upi_cleanup_mapping, + .type_id = UNCORE_SPR_UPI, + .num_counters = 4, + .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, + .perf_ctr_bits = 48, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0, + .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, + .pci_offsets = spr_upi_pci_offsets, +}; + +static struct intel_uncore_type spr_uncore_m3upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "m3upi", + .type_id = UNCORE_SPR_M3UPI, + .num_counters = 4, + .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, + .perf_ctr_bits = 48, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, + .pci_offsets = spr_upi_pci_offsets, + .constraints = icx_uncore_m3upi_constraints, +}; + enum perf_uncore_spr_iio_freerunning_type_id { SPR_IIO_MSR_IOCLK, SPR_IIO_MSR_BW_IN, @@ -6314,6 +6336,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = { #define UNCORE_SPR_MSR_EXTRA_UNCORES 1 #define UNCORE_SPR_MMIO_EXTRA_UNCORES 1 +#define UNCORE_SPR_PCI_EXTRA_UNCORES 2 static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = { &spr_uncore_iio_free_running, @@ -6323,6 +6346,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] &spr_uncore_imc_free_running, }; +static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = { + &spr_uncore_upi, + &spr_uncore_m3upi +}; + +int spr_uncore_units_ignore[] = { + UNCORE_SPR_UPI, + UNCORE_SPR_M3UPI, + UNCORE_IGNORE_END +}; + static void uncore_type_customized_copy(struct intel_uncore_type *to_type, struct intel_uncore_type *from_type) { @@ -6423,9 +6457,69 @@ void spr_uncore_cpu_init(void) spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); } +#define SPR_UNCORE_UPI_PCIID 0x3241 +#define SPR_UNCORE_UPI0_DEVFN 0x9 +#define SPR_UNCORE_M3UPI_PCIID 0x3246 +#define SPR_UNCORE_M3UPI0_DEVFN 0x29 + +static void spr_update_device_location(int type_id) +{ + struct intel_uncore_type *type; + struct pci_dev *dev = NULL; + u32 device, devfn; + u64 *ctls; + int die; + + if (type_id == UNCORE_SPR_UPI) { + type = &spr_uncore_upi; + device = SPR_UNCORE_UPI_PCIID; + devfn = SPR_UNCORE_UPI0_DEVFN; + } else if (type_id == UNCORE_SPR_M3UPI) { + type = &spr_uncore_m3upi; + device = SPR_UNCORE_M3UPI_PCIID; + devfn = SPR_UNCORE_M3UPI0_DEVFN; + } else + return; + + ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); + if (!ctls) { + type->num_boxes = 0; + return; + } + + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { + if (devfn != dev->devfn) + continue; + + die = uncore_device_to_die(dev); + if (die < 0) + continue; + + ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET | + dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET | + devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET | + type->box_ctl; + } + + type->box_ctls = ctls; +} + int spr_uncore_pci_init(void) { - uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL); + /* + * The discovery table of UPI on some SPR variant is broken, + * which impacts the detection of both UPI and M3UPI uncore PMON. + * Use the pre-defined UPI and M3UPI table to replace. + * + * The accurate location, e.g., domain and BUS number, + * can only be retrieved at load time. + * Update the location of UPI and M3UPI. + */ + spr_update_device_location(UNCORE_SPR_UPI); + spr_update_device_location(UNCORE_SPR_M3UPI); + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, + UNCORE_SPR_PCI_EXTRA_UNCORES, + spr_pci_uncores); return 0; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 0e849f28a5c1..d6de4487348c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -35,15 +35,17 @@ * per-core reg tables. */ enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ + EXTRA_REG_NONE = -1, /* not used */ - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - EXTRA_REG_LBR = 2, /* lbr_select */ - EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ - EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */ + EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */ - EXTRA_REG_MAX /* number of entries needed */ + EXTRA_REG_MAX /* number of entries needed */ }; struct event_constraint { @@ -606,6 +608,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 pebs_timing_info:1; u64 anythread_deprecated:1; }; u64 capabilities; @@ -647,6 +650,7 @@ enum { }; #define PERF_PEBS_DATA_SOURCE_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) struct x86_hybrid_pmu { struct pmu pmu; @@ -1000,6 +1004,7 @@ do { \ #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ +#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1486,6 +1491,8 @@ int intel_pmu_drain_bts_buffer(void); u64 adl_latency_data_small(struct perf_event *event, u64 status); +u64 mtl_latency_data_small(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1597,6 +1604,8 @@ void intel_pmu_pebs_data_source_adl(void); void intel_pmu_pebs_data_source_grt(void); +void intel_pmu_pebs_data_source_mtl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c index 949d845c922b..3e9acdaeed1e 100644 --- a/arch/x86/events/zhaoxin/core.c +++ b/arch/x86/events/zhaoxin/core.c @@ -541,7 +541,13 @@ __init int zhaoxin_pmu_init(void) switch (boot_cpu_data.x86) { case 0x06: - if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) { + /* + * Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS. + * Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D] + * ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3 + */ + if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) || + boot_cpu_data.x86_model == 0x19) { x86_pmu.max_period = x86_pmu.cntval_mask >> 1; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8f39c46197b8..dba52f43c8a1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -312,6 +312,7 @@ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d3fe82c5d6b6..c88dd9d3fa08 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -189,6 +189,9 @@ #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af +#define MSR_SNOOP_RSP_0 0x00001328 +#define MSR_SNOOP_RSP_1 0x00001329 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 5d0f6891ae61..6496bdbcac98 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -160,6 +160,14 @@ union cpuid10_edx { }; /* + * Intel "Architectural Performance Monitoring extension" CPUID + * detection/enumeration details: + */ +#define ARCH_PERFMON_EXT_LEAF 0x00000023 +#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1 +#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 + +/* * Intel Architectural LBR CPUID detection/enumeration details: */ union cpuid28_eax { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 695873c0f50b..754da3b226f9 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -659,17 +659,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn) * is determined by the MOD/RM byte. */ opcode = insn->modrm.bytes[0]; - if ((opcode & 0x30) == 0x10) { - if ((opcode & 0x8) == 0x8) - return -EOPNOTSUPP; /* far call */ - /* call absolute, indirect */ + switch (X86_MODRM_REG(opcode)) { + case 0b010: /* FF /2, call near, absolute indirect */ p->ainsn.emulate_op = kprobe_emulate_call_indirect; - } else if ((opcode & 0x30) == 0x20) { - if ((opcode & 0x8) == 0x8) - return -EOPNOTSUPP; /* far jmp */ - /* jmp near absolute indirect */ + break; + case 0b100: /* FF /4, jmp near, absolute indirect */ p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; - } else + break; + case 0b011: /* FF /3, call far, absolute indirect */ + case 0b101: /* FF /5, jmp far, absolute indirect */ + return -EOPNOTSUPP; + } + + if (!p->ainsn.emulate_op) break; if (insn->addr_bytes != sizeof(unsigned long)) @@ -990,20 +992,6 @@ int kprobe_int3_handler(struct pt_regs *regs) kprobe_post_process(p, regs, kcb); return 1; } - } - - if (*addr != INT3_INSN_OPCODE) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->ip = (unsigned long)addr; - return 1; } /* else: not a kprobe fault; let the kernel handle it */ return 0; |