From 1d262a85e289fdaa75923aa5eac4489e9158466f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 13 Feb 2024 17:17:57 -0800 Subject: perf vendor events intel: Update meteorlake events to v1.07 Update meteorlake events to v1.07 released in: https://github.com/intel/perfmon/commit/62517223080e46bfa9a905a1195c7febae7fdb3e Umask changed on atom mem_bound events. Adds atom events ARITH.FPDIV_ACTIVE, FP_FLOPS_RETIRED.ALL, FP_FLOPS_RETIRED.DP, FP_FLOPS_RETIRED.FP32, ARITH.DIV_ACTIVE, BR_INST_RETIRED.COND, BR_INST_RETIRED.COND_TAKEN, BR_INST_RETIRED.INDIRECT, BR_INST_RETIRED.INDIRECT_CALL, BR_INST_RETIRED.IND_CALL, BR_INST_RETIRED.NEAR_RETURN, DTLB_LOAD_MISSES.WALK_COMPLETED_4K, DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M, DTLB_STORE_MISSES.WALK_COMPLETED_4K, ITLB_MISSES.WALK_COMPLETED_4K, and alias events. Event json automatically generated by: https://github.com/intel/perfmon/blob/main/scripts/create_perf_json.py Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Cc: Stephane Eranian Cc: Caleb Biggers Cc: Edward Baker Cc: Perry Taylor Cc: Samantha Alt Cc: Weilin Wang Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240214011820.644458-9-irogers@google.com --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- .../perf/pmu-events/arch/x86/meteorlake/cache.json | 8 +- .../arch/x86/meteorlake/floating-point.json | 86 +++++++++++++++++++++- .../perf/pmu-events/arch/x86/meteorlake/other.json | 10 +++ .../pmu-events/arch/x86/meteorlake/pipeline.json | 76 +++++++++++++++++++ .../arch/x86/meteorlake/virtual-memory.json | 36 +++++++++ 6 files changed, 210 insertions(+), 8 deletions(-) (limited to 'tools/perf/pmu-events') diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 953e13a136a4..09145aaa0d8e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -21,7 +21,7 @@ GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core GenuineIntel-6-(57|85),v16,knightslanding,core GenuineIntel-6-BD,v1.00,lunarlake,core -GenuineIntel-6-A[AC],v1.06,meteorlake,core +GenuineIntel-6-A[AC],v1.07,meteorlake,core GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core GenuineIntel-6-A7,v1.01,rocketlake,core diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json index 5fef87502d4b..47861a6dd8e9 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json @@ -319,7 +319,7 @@ "EventCode": "0x35", "EventName": "MEM_BOUND_STALLS_IFETCH.ALL", "SampleAfterValue": "1000003", - "UMask": "0x6f", + "UMask": "0x7f", "Unit": "cpu_atom" }, { @@ -344,7 +344,7 @@ "EventCode": "0x35", "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS", "SampleAfterValue": "1000003", - "UMask": "0x68", + "UMask": "0x78", "Unit": "cpu_atom" }, { @@ -352,7 +352,7 @@ "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS_LOAD.ALL", "SampleAfterValue": "1000003", - "UMask": "0x6f", + "UMask": "0x7f", "Unit": "cpu_atom" }, { @@ -377,7 +377,7 @@ "EventCode": "0x34", "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS", "SampleAfterValue": "1000003", - "UMask": "0x68", + "UMask": "0x78", "Unit": "cpu_atom" }, { diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json index f66506ee37ef..30e604d2120f 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json @@ -1,4 +1,13 @@ [ + { + "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.", + "CounterMask": "1", + "EventCode": "0xcd", + "EventName": "ARITH.FPDIV_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "This event counts the cycles the floating point divider is busy.", "CounterMask": "1", @@ -26,7 +35,7 @@ "Unit": "cpu_core" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_0", "SampleAfterValue": "2000003", @@ -34,7 +43,7 @@ "Unit": "cpu_core" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_1", "SampleAfterValue": "2000003", @@ -42,13 +51,37 @@ "Unit": "cpu_core" }, { - "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5", + "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]", "EventCode": "0xb3", "EventName": "FP_ARITH_DISPATCHED.PORT_5", "SampleAfterValue": "2000003", "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V0", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V1", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]", + "EventCode": "0xb3", + "EventName": "FP_ARITH_DISPATCHED.V2", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, { "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "EventCode": "0xc7", @@ -130,6 +163,53 @@ "UMask": "0xfc", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of all types of floating point operations per uop with all default weighting", + "EventCode": "0xc8", + "EventName": "FP_FLOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP64]", + "Deprecated": "1", + "EventCode": "0xc8", + "EventName": "FP_FLOPS_RETIRED.DP", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of floating point operations that produce 32 bit single precision results [This event is alias to FP_FLOPS_RETIRED.SP]", + "EventCode": "0xc8", + "EventName": "FP_FLOPS_RETIRED.FP32", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of floating point operations that produce 64 bit double precision results [This event is alias to FP_FLOPS_RETIRED.DP]", + "EventCode": "0xc8", + "EventName": "FP_FLOPS_RETIRED.FP64", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP32]", + "Deprecated": "1", + "EventCode": "0xc8", + "EventName": "FP_FLOPS_RETIRED.SP", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", "EventCode": "0xc3", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json index d55e792c0c43..7effc1f271e7 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json @@ -7,6 +7,16 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]", + "Deprecated": "1", + "EventCode": "0xe4", + "EventName": "LBR_INSERTS.ANY", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts demand data reads that have any type of response.", "EventCode": "0x2A,0x2B", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json index deaa7aba93f7..24bbfcebd2be 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json @@ -1,4 +1,13 @@ [ + { + "BriefDescription": "Counts the number of cycles when any of the dividers are active.", + "CounterMask": "1", + "EventCode": "0xcd", + "EventName": "ARITH.DIV_ACTIVE", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, { "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.", "CounterMask": "1", @@ -45,6 +54,15 @@ "SampleAfterValue": "400009", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e", + "Unit": "cpu_atom" + }, { "BriefDescription": "Conditional branch instructions retired.", "EventCode": "0xc4", @@ -65,6 +83,15 @@ "UMask": "0x10", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe", + "Unit": "cpu_atom" + }, { "BriefDescription": "Taken conditional branch instructions retired.", "EventCode": "0xc4", @@ -94,6 +121,15 @@ "UMask": "0x40", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb", + "Unit": "cpu_atom" + }, { "BriefDescription": "Indirect near branch instructions retired (excluding returns)", "EventCode": "0xc4", @@ -104,6 +140,25 @@ "UMask": "0x80", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts the number of near CALL branch instructions retired.", "EventCode": "0xc4", @@ -123,6 +178,15 @@ "UMask": "0x2", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of near RET branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7", + "Unit": "cpu_atom" + }, { "BriefDescription": "Return instructions retired.", "EventCode": "0xc4", @@ -671,6 +735,7 @@ "BriefDescription": "INST_RETIRED.MACRO_FUSED", "EventCode": "0xc0", "EventName": "INST_RETIRED.MACRO_FUSED", + "PEBS": "1", "SampleAfterValue": "2000003", "UMask": "0x10", "Unit": "cpu_core" @@ -679,6 +744,7 @@ "BriefDescription": "Retired NOP instructions.", "EventCode": "0xc0", "EventName": "INST_RETIRED.NOP", + "PEBS": "1", "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions", "SampleAfterValue": "2000003", "UMask": "0x2", @@ -697,6 +763,7 @@ "BriefDescription": "Iterations of Repeat string retired instructions.", "EventCode": "0xc0", "EventName": "INST_RETIRED.REP_ITERATION", + "PEBS": "1", "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.", "SampleAfterValue": "2000003", "UMask": "0x8", @@ -979,6 +1046,15 @@ "UMask": "0x20", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of Last Branch Record (LBR) entries. Requires LBRs to be enabled and configured in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]", + "EventCode": "0xe4", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, { "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.", "EventCode": "0xa2", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json index 056c2a885a32..55798e64c58a 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json @@ -70,6 +70,15 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 4K page.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Page walks completed due to a demand data load to a 4K page.", "EventCode": "0x12", @@ -150,6 +159,15 @@ "UMask": "0x8", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, { "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.", "EventCode": "0x13", @@ -159,6 +177,15 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 4K page.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Page walks completed due to a demand data store to a 4K page.", "EventCode": "0x13", @@ -257,6 +284,15 @@ "UMask": "0x4", "Unit": "cpu_core" }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 4K page.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, { "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "EventCode": "0x11", -- cgit v1.2.3-58-ga151