perf vendor events: Update Intel icelake

author Ian Rogers <irogers@google.com>

Wed, 27 Jul 2022 22:08:14 +0000 (15:08 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Thu, 28 Jul 2022 19:07:50 +0000 (16:07 -0300)
author Ian Rogers <irogers@google.com>
Wed, 27 Jul 2022 22:08:14 +0000 (15:08 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Thu, 28 Jul 2022 19:07:50 +0000 (16:07 -0300)
diff --git a/tools/perf/pmu-events/arch/x86/icelake/cache.json b/tools/perf/pmu-events/arch/x86/icelake/cache.json

index 9989f3338f0a212d77b0486dc3ccf72260596d8e..b4f28f24ee63df364f2cbc67cc75863432867d5a 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/icelake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/cache.json
@@ -303,7 +303,7 @@
          "UMask": "0x41"
      },
      {
-        "BriefDescription": "All retired load instructions.",
+        "BriefDescription": "Retired load instructions.",
          "CollectPEBSRecord": "2",
          "Counter": "0,1,2,3",
          "Data_LA": "1",
@@ -311,12 +311,12 @@
          "EventName": "MEM_INST_RETIRED.ALL_LOADS",
          "PEBS": "1",
          "PEBScounters": "0,1,2,3",
-        "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions for loads.",
+        "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.",
          "SampleAfterValue": "1000003",
          "UMask": "0x81"
      },
      {
-        "BriefDescription": "All retired store instructions.",
+        "BriefDescription": "Retired store instructions.",
          "CollectPEBSRecord": "2",
          "Counter": "0,1,2,3",
          "Data_LA": "1",
@@ -325,7 +325,7 @@
          "L1_Hit_Indication": "1",
          "PEBS": "1",
          "PEBScounters": "0,1,2,3",
-        "PublicDescription": "Counts all retired store instructions. This event account for SW prefetch instructions and PREFETCHW instruction for stores.",
+        "PublicDescription": "Counts all retired store instructions.",
          "SampleAfterValue": "1000003",
          "UMask": "0x82"
      },
diff --git a/tools/perf/pmu-events/arch/x86/icelake/floating-point.json b/tools/perf/pmu-events/arch/x86/icelake/floating-point.json

index 4347e2d0d0904da1eb82b03f827134d710692b2e..1925388969bba997974e36253a8b3918bf7ea131 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/icelake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/floating-point.json
@@ -99,4 +99,4 @@
          "SampleAfterValue": "100003",
          "UMask": "0x2"
      }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelake/frontend.json b/tools/perf/pmu-events/arch/x86/icelake/frontend.json

index b510dd5d80da3c6e68b833c377333944f221bfd9..739361d3f52f22f519b98da18f1e818b83e084b2 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/icelake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/frontend.json
@@ -494,4 +494,4 @@
          "Speculative": "1",
          "UMask": "0x1"
      }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json

index 622c392f59be32f2886c24ffe877736f66a929c3..f0356d66a9271412f0314259319b5cc4f7aac264 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -38,7 +38,7 @@
      {
          "BriefDescription": "Fraction of Physical Core issue-slots utilized by this Logical Processor",
          "MetricExpr": "TOPDOWN.SLOTS / ( TOPDOWN.SLOTS / 2 ) if #SMT_on else 1",
-        "MetricGroup": "SMT",
+        "MetricGroup": "SMT;TmaL1",
          "MetricName": "Slots_Utilization"
      },
      {
@@ -61,24 +61,18 @@
          "MetricName": "FLOPc"
      },
      {
-        "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
+        "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
          "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.DISTRIBUTED )",
          "MetricGroup": "Cor;Flops;HPC",
          "MetricName": "FP_Arith_Utilization",
-        "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
+        "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
      },
      {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
          "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
          "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
          "MetricName": "ILP"
      },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BadSpec;BrMispredicts",
-        "MetricName": "IpMispredict"
-    },
      {
          "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
          "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
@@ -169,12 +163,24 @@
          "MetricName": "IpArith_AVX512",
          "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
      },
+    {
+        "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / cpu@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+        "MetricGroup": "Prefetches",
+        "MetricName": "IpSWPF"
+    },
      {
          "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
          "MetricExpr": "INST_RETIRED.ANY",
          "MetricGroup": "Summary;TmaL1",
          "MetricName": "Instructions"
      },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
+        "MetricName": "Execute"
+    },
      {
          "BriefDescription": "Average number of Uops issued by front-end when it issued something",
          "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
@@ -194,11 +200,23 @@
          "MetricName": "DSB_Coverage"
      },
      {
-        "BriefDescription": "Number of Instructions per non-speculative DSB miss",
+        "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
+        "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+        "MetricGroup": "DSBmiss",
+        "MetricName": "DSB_Switch_Cost"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
          "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
          "MetricGroup": "DSBmiss;Fed",
          "MetricName": "IpDSB_Miss_Ret"
      },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;BadSpec;BrMispredicts",
+        "MetricName": "IpMispredict"
+    },
      {
          "BriefDescription": "Fraction of branches that are non-taken conditionals",
          "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
@@ -230,11 +248,10 @@
          "MetricName": "Other_Branches"
      },
      {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
          "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
          "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "Load_Miss_Real_Latency",
-        "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
+        "MetricName": "Load_Miss_Real_Latency"
      },
      {
          "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
@@ -242,30 +259,6 @@
          "MetricGroup": "Mem;MemoryBound;MemoryBW",
          "MetricName": "MLP"
      },
-    {
-        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "L1D_Cache_Fill_BW"
-    },
-    {
-        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "L2_Cache_Fill_BW"
-    },
-    {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "L3_Cache_Fill_BW"
-    },
-    {
-        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "L3_Cache_Access_BW"
-    },
      {
          "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
          "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
@@ -285,13 +278,13 @@
          "MetricName": "L2MPKI"
      },
      {
-        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
          "MetricExpr": "1000 * ( ( OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD ) + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS ) / INST_RETIRED.ANY",
          "MetricGroup": "Mem;CacheMisses;Offcore",
          "MetricName": "L2MPKI_All"
      },
      {
-        "BriefDescription": "L2 cache misses per kilo instruction for all demand loads  (including speculative)",
+        "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
          "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
          "MetricGroup": "Mem;CacheMisses",
          "MetricName": "L2MPKI_Load"
@@ -309,7 +302,7 @@
          "MetricName": "L3MPKI"
      },
      {
-        "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads",
+        "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
          "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
          "MetricGroup": "Mem;CacheMisses",
          "MetricName": "FB_HPKI"
@@ -321,6 +314,54 @@
          "MetricGroup": "Mem;MemoryTLB",
          "MetricName": "Page_Walks_Utilization"
      },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L1D_Cache_Fill_BW_1T"
+    },
+    {
+        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L2_Cache_Fill_BW_1T"
+    },
+    {
+        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L3_Cache_Fill_BW_1T"
+    },
+    {
+        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "(64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time)",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "L3_Cache_Access_BW_1T"
+    },
      {
          "BriefDescription": "Average CPU Utilization",
          "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
@@ -337,7 +378,8 @@
          "BriefDescription": "Giga Floating Point Operations Per Second",
          "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
          "MetricGroup": "Cor;Flops;HPC",
-        "MetricName": "GFLOPs"
+        "MetricName": "GFLOPs",
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
      },
      {
          "BriefDescription": "Average Frequency Utilization relative nominal frequency",
diff --git a/tools/perf/pmu-events/arch/x86/icelake/uncore-other.json b/tools/perf/pmu-events/arch/x86/icelake/uncore-other.json

new file mode 100644 (file)

index 0000000..e007b97
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/icelake/uncore-other.json
@@ -0,0 +1,31 @@
+[
+    {
+        "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop,  etc.",
+        "Counter": "1",
+        "EventCode": "0x84",
+        "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop,  etc.",
+        "UMask": "0x01",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+        "Counter": "1",
+        "EventCode": "0x81",
+        "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Total number of all outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+        "UMask": "0x01",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "UNC_CLOCK.SOCKET",
+        "Counter": "FIXED",
+        "EventCode": "0xff",
+        "EventName": "UNC_CLOCK.SOCKET",
+        "PerPkg": "1",
+        "PublicDescription": "UNC_CLOCK.SOCKET",
+        "Unit": "CLOCK"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json

index a006fd7f7b187677da879d68a18b7ee1280a21d1..58809e16bf982199b51ba51960144642b34b42c4 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json
@@ -242,4 +242,4 @@
          "Speculative": "1",
          "UMask": "0x20"
      }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv

index b602d2da1b89abe0a73a6e047242a22598e7b250..898e507288a2e2203ba79548300dd72ba5fce3dd 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -10,6 +10,7 @@ GenuineIntel-6-5[CF],v13,goldmont,core
  GenuineIntel-6-7A,v1.01,goldmontplus,core
  GenuineIntel-6-(3C|45|46),v31,haswell,core
  GenuineIntel-6-3F,v25,haswellx,core
+GenuineIntel-6-(7D|7E|A7),v1.14,icelake,core
  GenuineIntel-6-3A,v18,ivybridge,core
  GenuineIntel-6-3E,v19,ivytown,core
  GenuineIntel-6-2D,v20,jaketown,core
@@ -29,10 +30,7 @@ GenuineIntel-6-2C,v2,westmereep-dp,core
  GenuineIntel-6-25,v2,westmereep-sp,core
  GenuineIntel-6-2F,v2,westmereex,core
  GenuineIntel-6-55-[01234],v1,skylakex,core
-GenuineIntel-6-7D,v1,icelake,core
-GenuineIntel-6-7E,v1,icelake,core
  GenuineIntel-6-8[CD],v1,tigerlake,core
-GenuineIntel-6-A7,v1,icelake,core
  GenuineIntel-6-6A,v1,icelakex,core
  GenuineIntel-6-6C,v1,icelakex,core
  GenuineIntel-6-86,v1,snowridgex,core
author	Ian Rogers <irogers@google.com>
	Wed, 27 Jul 2022 22:08:14 +0000 (15:08 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Thu, 28 Jul 2022 19:07:50 +0000 (16:07 -0300)
tools/perf/pmu-events/arch/x86/icelake/cache.json		patch \| blob \| history
tools/perf/pmu-events/arch/x86/icelake/floating-point.json		patch \| blob \| history
tools/perf/pmu-events/arch/x86/icelake/frontend.json		patch \| blob \| history
tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json		patch \| blob \| history
tools/perf/pmu-events/arch/x86/icelake/uncore-other.json	[new file with mode: 0644]	patch \| blob
tools/perf/pmu-events/arch/x86/icelake/virtual-memory.json		patch \| blob \| history
tools/perf/pmu-events/arch/x86/mapfile.csv		patch \| blob \| history